In [1]:
# =============================================================================
# COMPLETE HYPERPARAMETER SEARCH FOR 10-QUBIT QNN WITH VARYING LAYERS
# Effective batch size = 256 (physical batch 64 + 4 accumulation steps)
# =============================================================================

import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import pennylane as qml
from pathlib import Path
import gc
import warnings
warnings.filterwarnings('ignore')

# -------------------- Environment --------------------
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
# Use GPU 0 (you can change to another index if needed)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# -------------------- Hyperparameters for search --------------------
n_qubits = 14
layer_list = [4, 6, 8, 10, 12,14,16,18]          # different layer depths
physical_batch = 64                      # must fit in GPU memory
accumulation_steps = 4                    # 64 * 4 = 256 effective batch
effective_batch = physical_batch * accumulation_steps
learning_rate = 1e-3
weight_decay = 1e-4
epochs = 100
patience = 20

print(f"Physical batch size: {physical_batch}, accumulation steps: {accumulation_steps}")
print(f"Effective batch size: {effective_batch}")

# -------------------- Data loading and preprocessing --------------------
print("\nLoading and preparing data...")
DRIVE_URL = "https://drive.google.com/uc?id=1PS0eB8dx8VMzVvxNUc6wBzsMRkEKJjWI"
df = pd.read_csv(DRIVE_URL)

# Physics feature engineering (copied from your notebook)
M_p = 938.272088
M_n = 939.565420
epsilon = 1e-30
LN10 = np.log(10.0)

def get_nucleon_mass(Z, A):
    return Z * M_p + (A - Z) * M_n

mass1 = df.apply(lambda r: get_nucleon_mass(r["Z1"], r["A1"]), axis=1).values
mass2 = df.apply(lambda r: get_nucleon_mass(r["Z2"], r["A2"]), axis=1).values
mu_MeVc2 = (mass1 * mass2) / (mass1 + mass2 + 1e-12)
Ecm = df["E c.m."].astype(float).values
v_over_c = np.sqrt(np.clip(2 * Ecm / (mu_MeVc2 + epsilon), 0, np.inf))
e2_hbar_c = 1 / 137.035999
df["eta"] = (df["Z1"] * df["Z2"]) / (e2_hbar_c * (v_over_c + 1e-16))

log10_sigma_exp = np.log10(np.clip(df["σ"], 1e-30, np.inf))
log10_sigma_cal = np.log10(np.clip(df["σ cal"], 1e-30, np.inf))
log10_Ecm = np.log10(np.clip(df["E c.m."], 1e-30, np.inf))
log10_exp_term = (2 * np.pi * df["eta"]) / LN10

df["log10_S_exp"] = log10_sigma_exp + log10_Ecm + log10_exp_term
df["log10_S_cal"] = log10_sigma_cal + log10_Ecm + log10_exp_term
df["delta_log10_S"] = df["log10_S_exp"] - df["log10_S_cal"]

df["N1"] = df["A1"] - df["Z1"]
df["N2"] = df["A2"] - df["Z2"]
df["Z1Z2_over_Ecm"] = (df["Z1"] * df["Z2"]) / (df["E c.m."] + epsilon)

MAGIC = np.array([2, 8, 20, 28, 50, 82, 126])
def magic_dist(arr):
    return np.min(np.abs(arr[:, None] - MAGIC[None, :]), axis=1)

df["magic_dist_Z1"] = magic_dist(df["Z1"].values)
df["magic_dist_N1"] = magic_dist(df["N1"].values)
df["magic_dist_Z2"] = magic_dist(df["Z2"].values)
df["magic_dist_N2"] = magic_dist(df["N2"].values)

# 29 features
features_train = [
    'E c.m.', 'Z1', 'N1', 'A1',
    'Z2', 'N2', 'A2', 'Q ( 2 n )',
    'Z1Z2_over_Ecm',
    'magic_dist_Z1', 'magic_dist_N1', 'magic_dist_Z2', 'magic_dist_N2',
    'Z3', 'N3', 'A3', 'β P', 'β T', 'R B', 'ħ ω',
    'Projectile_Mass_Actual', 'Target_Mass_Actual', 'Compound_Nucleus_Mass_Actual',
    'Compound_Nucleus_Sp', 'Compound_Nucleus_Sn',
    'Projectile_Binding_Energy', 'Target_Binding_Energy',
    'Compound_Nucleus_Binding_Energy', 'Compound_Nucleus_S2n'
]

# Reaction split
OUTDIR_BASE = "mdn_70_10_20_optimized"
train_file = Path(OUTDIR_BASE) / "train_reactions.csv"
val_file   = Path(OUTDIR_BASE) / "val_reactions.csv"
test_file  = Path(OUTDIR_BASE) / "test_reactions.csv"

if train_file.exists():
    train_reacts = pd.read_csv(train_file)["Reaction"].values
    val_reacts   = pd.read_csv(val_file)["Reaction"].values
    test_reacts  = pd.read_csv(test_file)["Reaction"].values
else:
    reactions = df["Reaction"].unique()
    np.random.seed(42)
    np.random.shuffle(reactions)
    n = len(reactions)
    train_reacts = reactions[:int(0.7 * n)]
    val_reacts   = reactions[int(0.7 * n):int(0.8 * n)]
    test_reacts  = reactions[int(0.8 * n):]
    os.makedirs(OUTDIR_BASE, exist_ok=True)
    pd.DataFrame({"Reaction": train_reacts}).to_csv(train_file, index=False)
    pd.DataFrame({"Reaction": val_reacts}).to_csv(val_file, index=False)
    pd.DataFrame({"Reaction": test_reacts}).to_csv(test_file, index=False)

train_mask = df["Reaction"].isin(train_reacts)
val_mask   = df["Reaction"].isin(val_reacts)
test_mask  = df["Reaction"].isin(test_reacts)

X = df[features_train].values.astype(np.float32)
y = df["delta_log10_S"].values.astype(np.float32).reshape(-1, 1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train = X_scaled[train_mask]
y_train = y[train_mask]
X_val   = X_scaled[val_mask]
y_val   = y[val_mask]
X_test  = X_scaled[test_mask]
y_test  = y[test_mask]

# Convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32)
X_val_t   = torch.tensor(X_val, dtype=torch.float32)
y_val_t   = torch.tensor(y_val, dtype=torch.float32)
X_test_t  = torch.tensor(X_test, dtype=torch.float32)
y_test_t  = torch.tensor(y_test, dtype=torch.float32)

# DataLoaders (with physical batch size)
train_loader = DataLoader(TensorDataset(X_train_t, y_train_t),
                          batch_size=physical_batch, shuffle=True,
                          num_workers=0, pin_memory=True)
val_loader   = DataLoader(TensorDataset(X_val_t, y_val_t),
                          batch_size=physical_batch, shuffle=False,
                          num_workers=0, pin_memory=True)
test_loader  = DataLoader(TensorDataset(X_test_t, y_test_t),
                          batch_size=physical_batch, shuffle=False,
                          num_workers=0, pin_memory=True)

print(f"Train samples: {X_train.shape[0]}, Validation: {X_val.shape[0]}, Test: {X_test.shape[0]}")
print(f"Train batches (physical): {len(train_loader)}")
print(f"Accumulation steps: {accumulation_steps} -> effective batches per epoch: {len(train_loader)//accumulation_steps}")

# -------------------- Quantum device and QNode --------------------
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, interface="torch", diff_method="backprop")
def qnode(weights, x):
    qml.templates.AngleEmbedding(x, wires=range(n_qubits), rotation="X")
    qml.templates.StronglyEntanglingLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

# -------------------- Model definition (flexible qubits & layers) --------------------
class QuantumRegressor(nn.Module):
    def __init__(self, n_qubits, n_layers):
        super().__init__()
        self.n_qubits = n_qubits
        self.encoder = nn.Linear(29, n_qubits)
        self.q_weights = nn.Parameter(0.01 * torch.randn(n_layers, n_qubits, 3))
        self.fc1 = nn.Linear(n_qubits, 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x = x.float()
        x_enc = torch.tanh(self.encoder(x))
        q_out_tuple = qnode(self.q_weights, x_enc)
        q_out = torch.stack(q_out_tuple, dim=1)
        q_out = q_out.to(x.dtype)
        h = torch.relu(self.fc1(q_out))
        return self.fc2(h)

# -------------------- Training function for one configuration --------------------
def train_one_config(layers, seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)

    model = QuantumRegressor(n_qubits=n_qubits, n_layers=layers).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    criterion = nn.MSELoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

    best_val_loss = float('inf')
    best_epoch = 0
    patience_counter = 0
    best_state = None

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        optimizer.zero_grad()   # reset at start of epoch

        for batch_idx, (xb, yb) in enumerate(train_loader):
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            loss = criterion(preds, yb) / accumulation_steps
            loss.backward()

            if (batch_idx + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            train_loss += loss.item() * accumulation_steps

        # Handle any leftover batches
        if (batch_idx + 1) % accumulation_steps != 0:
            optimizer.step()
            optimizer.zero_grad()

        train_loss /= len(train_loader)

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                val_loss += criterion(preds, yb).item()
        val_loss /= len(val_loader)

        scheduler.step(val_loss)

        if epoch % 10 == 0:
            print(f"  Epoch {epoch:3d}/{epochs} | Train Loss: {train_loss:.6f} | Val Loss: {val_loss:.6f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_epoch = epoch
            patience_counter = 0
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"    Early stopping at epoch {epoch}")
                break

    # Load best model
    model.load_state_dict(best_state)
    model.to(device)

    # Evaluate on test set
    model.eval()
    preds_list = []
    with torch.no_grad():
        for xb, _ in test_loader:
            xb = xb.to(device)
            preds = model(xb)
            preds_list.append(preds.cpu().numpy())
    preds_arr = np.vstack(preds_list).flatten()
    y_true = y_test.flatten()

    mse = mean_squared_error(y_true, preds_arr)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, preds_arr)

    # Also compute validation metrics on best model
    val_preds = []
    with torch.no_grad():
        for xb, _ in val_loader:
            xb = xb.to(device)
            preds = model(xb)
            val_preds.append(preds.cpu().numpy())
    val_preds = np.vstack(val_preds).flatten()
    val_true = y_val.flatten()
    val_mse = mean_squared_error(val_true, val_preds)
    val_rmse = np.sqrt(val_mse)
    val_r2 = r2_score(val_true, val_preds)

    return {
        'layers': layers,
        'val_mse': val_mse,
        'val_rmse': val_rmse,
        'val_r2': val_r2,
        'test_mse': mse,
        'test_rmse': rmse,
        'test_r2': r2,
        'best_epoch': best_epoch
    }

# -------------------- Run hyperparameter search --------------------
results = []
for layers in layer_list:
    print(f"\n{'='*60}\nTraining with {layers} layers\n{'='*60}")
    torch.cuda.empty_cache()
    gc.collect()
    res = train_one_config(layers, seed=42)
    results.append(res)
    print(f"  --> Test R² = {res['test_r2']:.4f}")

# -------------------- Display summary --------------------
results_df = pd.DataFrame(results)
print("\n\n" + "="*70)
print("HYPERPARAMETER SEARCH SUMMARY (10 qubits, effective batch 256)")
print("="*70)
print(results_df.to_string(index=False))
print("\nBest test R²: {:.4f} with {} layers".format(
    results_df.loc[results_df['test_r2'].idxmax(), 'test_r2'],
    results_df.loc[results_df['test_r2'].idxmax(), 'layers']
))

Using device: cuda:0
Physical batch size: 64, accumulation steps: 4
Effective batch size: 256

Loading and preparing data...
Train samples: 2493, Validation: 354, Test: 685
Train batches (physical): 39
Accumulation steps: 4 -> effective batches per epoch: 9

Training with 4 layers
  Epoch   0/100 | Train Loss: 0.087380 | Val Loss: 0.101555
  Epoch  10/100 | Train Loss: 0.069700 | Val Loss: 0.102930
  Epoch  20/100 | Train Loss: 0.065974 | Val Loss: 0.105205
    Early stopping at epoch 23
  --> Test R² = -0.0287

Training with 6 layers
  Epoch   0/100 | Train Loss: 0.109002 | Val Loss: 0.115668
  Epoch  10/100 | Train Loss: 0.071582 | Val Loss: 0.105176
  Epoch  20/100 | Train Loss: 0.067062 | Val Loss: 0.106044
    Early stopping at epoch 23
  --> Test R² = -0.0106

Training with 8 layers
  Epoch   0/100 | Train Loss: 0.076948 | Val Loss: 0.106341
  Epoch  10/100 | Train Loss: 0.064716 | Val Loss: 0.113341
  Epoch  20/100 | Train Loss: 0.061401 | Val Loss: 0.114392
    Early stopping a