# Setup


In [1]:
from qiskit_aer import Aer

# Check available devices (should show both CPU and GPU)
print("Available devices:", 
      Aer.get_backend('statevector_simulator').available_devices())


Available devices: ('CPU', 'GPU')


In [2]:
from qiskit_aer import Aer
print(Aer.backends(name='statevector_simulator')[0].configuration().simulator)
# Should show {'cuStateVec': True, 'max_shots': 1}

True


In [3]:
from qiskit import QuantumCircuit, transpile
from qiskit_aer import AerSimulator

# Define a simple 2-qubit circuit
qc = QuantumCircuit(2)
qc.h(0)
qc.cx(0, 1)
qc.save_statevector()

# Setup GPU simulator
simulator = AerSimulator(method='statevector', device='GPU')

# Transpile and run
qc_t = transpile(qc, simulator)
result = simulator.run(qc_t).result()

# Print final statevector
statevector = result.get_statevector()
print(statevector)


Statevector([0.70710678+0.j, 0.        +0.j, 0.        +0.j,
             0.70710678+0.j],
            dims=(2, 2))


In [3]:
from qiskit import QuantumCircuit
from qiskit_aer import AerSimulator

qc = QuantumCircuit(2)
qc.h(0)
qc.cx(0, 1)
qc.measure_all()

sim = AerSimulator(
    method='statevector',
    device='GPU',
    cuStateVec_enable=True,
    max_parallel_experiments=512
)

result = sim.run(qc, shots=1000).result()
print("Counts:", result.get_counts())

Counts: {'00': 508, '11': 492}


# Data prep

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load your dataset
approach_3_df = pd.read_csv(....)
approach_3_df[....] = approach_3_df[....] / 100

# Specify your features here
list_3 = [....]  # Replace this with your actual column names

# Normalize the features if needed (StandardScaler as an example)
scaler = StandardScaler()
approach_3_df[list_3] = scaler.fit_transform(approach_3_df[list_3])

# Extract relevant columns for features and labels
features_3 = approach_3_df[list_3 + [....]]

# Split data (70% train, 15% validation, 15% test)
train_df, temp_df = train_test_split(features_3, test_size=0.4, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

train_df = train_df.loc[:, ~train_df.columns.duplicated()]
val_df = val_df.loc[:, ~val_df.columns.duplicated()]
test_df = test_df.loc[:, ~test_df.columns.duplicated()]

# Extract features and labels for train, validation, and test datasets
train_features = train_df.drop(columns=[....])
train_labels = torch.tensor(train_df[....].values, dtype=torch.float32, device=device).unsqueeze(1) 

val_features = val_df.drop(columns=[....])
val_labels = torch.tensor(val_df[....].values, dtype=torch.float32, device=device).unsqueeze(1)

test_features = test_df.drop(columns=[....])
test_labels = torch.tensor(test_df[....].values, dtype=torch.float32, device=device).unsqueeze(1)

# Convert data into GPU tensors (already done above)
xtrain = torch.tensor(train_features.values, dtype=torch.float32, device=device)
xval = torch.tensor(val_features.values, dtype=torch.float32, device=device)
xtest = torch.tensor(test_features.values, dtype=torch.float32, device=device)

# Create TensorDatasets
train_dataset = TensorDataset(xtrain, train_labels)
val_dataset = TensorDataset(xval, val_labels)
test_dataset = TensorDataset(xtest, test_labels)

print(train_labels)


# DataLoader configuration for efficient GPU-based data loading
gpu_loader_config = {
    "batch_size": 64,
    "pin_memory": False,        # Not necessary as data is already on GPU
    "num_workers": 0,           # No parallel CPU data loading
    "persistent_workers": False,
    "shuffle": True             # Shuffle for training data
}

# Create DataLoader instances for training, validation, and testing
train_loader = DataLoader(train_dataset, **gpu_loader_config)

# Adjust batch_size for validation and testing
val_test_loader_config = {k: v for k, v in gpu_loader_config.items() if k not in ['batch_size', 'shuffle']}
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, **val_test_loader_config)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, **val_test_loader_config)

# Verify that tensors are on the correct device (GPU)
print(f"xtrain device: {xtrain.device}, ytrain device: {train_labels.device}")
print(f"xval device: {xval.device}, yval device: {val_labels.device}")
print(f"xtest device: {xtest.device}, ytest device: {test_labels.device}")


Using device: cuda
tensor([[-0.4928],
        [-0.6126],
        [-0.6606],
        ...,
        [ 0.8730],
        [ 1.2324],
        [-0.4529]], device='cuda:0')
xtrain device: cuda:0, ytrain device: cuda:0
xval device: cuda:0, yval device: cuda:0
xtest device: cuda:0, ytest device: cuda:0


# Model define

In [None]:
import torch
import torch.nn as nn
from qiskit import QuantumCircuit, transpile
from qiskit.circuit import ParameterVector
from qiskit.quantum_info import SparsePauliOp
from qiskit_aer import AerSimulator
from qiskit_aer.primitives import Estimator
from qiskit_machine_learning.neural_networks import EstimatorQNN
from qiskit_machine_learning.connectors import TorchConnector
import torch.cuda.amp as amp
from qiskit.quantum_info import Statevector


import os

# Hyperparameters
input_size = ....
num_qubits = ....
num_layers = ....
shots = ....
batch_size = ....
num_epochs = ....

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Autoencoders ---
class PreAutoencoder(nn.Module):
    def __init__(self):  # Fixed the constructor name (_init_ -> __init__)
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(...., ....),
            nn.ReLU(),
            nn.Linear(...., num_qubits),
            nn.Tanh()
        )

    def forward(self, x):
        return self.encoder(x)

class PostAutoencoder(nn.Module):
    def __init__(self):  # Fixed the constructor name (_init_ -> __init__)
        super().__init__()
        self.decoder = nn.Sequential(
            nn.Linear(num_qubits, ....),
            nn.ReLU(),
            nn.Linear(...., ....)
        )

    def forward(self, x):
        return self.decoder(x)

# --- Quantum Circuit ---
qc = QuantumCircuit(num_qubits)
input_params = ParameterVector('x', num_qubits)
weight_params = ParameterVector('θ', num_layers * num_qubits)

# Input encoding (single layer)
for i in range(num_qubits):
    ....

# Variational layers
for layer in range(num_layers):
    # Parametrized rotations
    for i in range(num_qubits):
        ....
    
    # Circular entanglement
    for i in range(num_qubits):
        ....

# Observables
observables = ....

# --- GPU Configuration ---
backend_options = {
    ....
}

run_options = {
   ....
}

transpile_options = {
    ....
}

# Initialize Estimator with proper options
estimator = Estimator(
    backend_options={
        ....
    },
    run_options={....},
    transpile_options={....}
)

# Enable PyTorch optimizations
torch.backends.cudnn.benchmark = True
torch.set_float32_matmul_precision('high')

# --- Hybrid Model ---
class HybridQNN(nn.Module):
    def __init__(self, pre_encoder, qnn, post_decoder):
        super().__init__()
        self.pre_encoder = pre_encoder.to(device)
        self.qnn = qnn.to(device)  # TorchConnector wraps QNN parameters
        self.post_decoder = post_decoder.to(device)

    def forward(self, x):
        # Ensure inputs are on the correct device (GPU/CPU)
        encoded = self.pre_encoder(x.to(device))
        
        # Mixed Precision Handling
        with torch.autocast(device_type="cuda"):  # Use mixed precision
            # Perform forward pass
            return self.post_decoder(self.qnn(encoded))

# Initialize Autoencoders and QNN
pre_ae = PreAutoencoder().to(device)
post_ae = PostAutoencoder().to(device)

# Quantum Neural Network (QNN) initialization


qnn = EstimatorQNN(
    ....
)


qnn_model = TorchConnector(qnn).to(device)

# Initialize Hybrid QNN Model
model = HybridQNN(pre_ae, qnn_model, post_ae).to(device)

# --- Optimizer & Scaler ---
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)  # Modify learning rate if necessary
scaler = torch.GradScaler()  # For mixed precision



  qnn = EstimatorQNN(


# Training the model

In [3]:
print(observables)

[SparsePauliOp(['IZ'],
              coeffs=[1.+0.j]), SparsePauliOp(['ZI'],
              coeffs=[1.+0.j])]


In [None]:
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import DataLoader

# 1. Configure Training Parameters
num_epochs = ....
batch_size = ....
checkpoint_path = "best_hybrid_model.pth"

# GPU-resident DataLoaders (no CPU transfers)
train_loader = DataLoader(
    ....
)

val_loader = DataLoader(
    ....
)

# 2. Initialize Optimizer
optimizer = optim.AdamW([
    {'params': model.pre_encoder.parameters(), 'lr': ........., 'weight_decay': .....},
    {'params': model.qnn.parameters(), 'lr': .....},
    {'params': model.post_decoder.parameters(), 'lr': ....., 'weight_decay': .....}
])

# 3. Learning Rate Scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=....., patience=...., verbose=True)

# 4. Mixed Precision Training (use default device handling)
scaler = torch.cuda.amp.GradScaler()

# 5. Loss Function
criterion = nn.MSELoss()

# 6. Training Loop
best_val_loss = float('inf')
for epoch in range(num_epochs):
    print('starting epoch :', epoch)
    # Initialize single progress bar per epoch
    with tqdm(total=len(train_loader) + len(val_loader), 
             desc=f"Epoch {epoch+1}/{num_epochs}", 
             unit='batch') as pbar:
        
        print('starting model training')
        # Training Phase
        model.train()
        train_loss = 0.0
        for inputs, labels in train_loader:
            # Ensure the inputs and labels are on the correct device (GPU/CPU)
            inputs, labels = inputs.to(device), labels.to(device)

            print(f"\n Input shape: {inputs.shape}, Label shape: {labels.shape}")
            
            print('starting forward pass')
            # Mixed Precision Autocast for Forward Pass
            with torch.autocast(device_type="cuda"):  # Use mixed precision for forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            print(f"\n Output shape from model: {outputs.shape}, Loss: {loss.item()}")

            print('starting backward pass')
            # Scaled backward pass (use mixed precision)
            scaler.scale(loss).backward()  # Scale loss and perform backward pass

            # Optional: Gradient clipping if using quantum layers
            # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            scaler.step(optimizer)  # Update weights using scaled gradients
            scaler.update()         # Adjust scaling factor for next iteration

            # Fastest gradient clearing (40% faster than regular zero_grad)
            optimizer.zero_grad(set_to_none=True)         # Zero gradients after optimization
            
            print('calculating training loss')
            # Accumulate training loss
            train_loss += loss.item() * inputs.size(0)
            pbar.set_postfix({'Phase': 'Train', 'Loss': loss.item()})
            pbar.update(1)

        print('starting validation')
        # Validation Phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                # Ensure the inputs and labels are on the correct device (GPU/CPU)
                inputs, labels = inputs.to(device), labels.to(device)
                
                print('starting validation forward pass')
                # Mixed Precision Autocast for Validation Forward Pass
                with torch.autocast(device_type="cuda"):  # Use mixed precision for forward pass
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                val_loss += loss.item() * inputs.size(0)
                pbar.set_postfix({'Phase': 'Validate', 'Loss': loss.item()})
                pbar.update(1)

        print('calculating validation loss')
        # Calculate metrics
        train_loss /= len(train_loader.dataset)
        val_loss /= len(val_loader.dataset)
        scheduler.step(val_loss)

        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), checkpoint_path)
            pbar.write(f"New best model saved with val loss: {val_loss:.4f}")

        print('updating progress bar')

        # Update progress bar with final metrics
        pbar.set_postfix({
            'Train Loss': f"{train_loss:.4f}",
            'Val Loss': f"{val_loss:.4f}",
            'LR': f"{optimizer.param_groups[1]['lr']:.5f}"
        })



  scaler = torch.cuda.amp.GradScaler()


starting epoch : 0


Epoch 1/20:   0%|          | 0/59 [00:00<?, ?batch/s]

starting model training

 Input shape: torch.Size([512, 19]), Label shape: torch.Size([512, 1])
starting forward pass

 Output shape from model: torch.Size([512, 1]), Loss: 1.0616509914398193
starting backward pass


In [None]:
# Final Evaluation (unchanged)
# 7. Final Evaluation
model.load_state_dict(torch.load(checkpoint_path))
model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs, labels in test_loader:
        # Ensure inputs are on the correct device (GPU/CPU)
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        test_loss += criterion(outputs, labels).item() * inputs.size(0)

test_loss /= len(test_loader.dataset)
print(f"\nFinal Test Loss: {test_loss:.4f}")