In [1]:
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import pennylane as qml
import matplotlib.pyplot as plt
import gc
import subprocess
from datetime import datetime

# ---- GPU Selection ----
def get_free_gpu():
    """Return index of GPU with most free memory (simple heuristic)."""
    result = subprocess.run(['nvidia-smi', '--query-gpu=memory.free', '--format=csv,nounits,noheader'], 
                            capture_output=True, text=True)
    free_memory = [int(x) for x in result.stdout.strip().split('\n')]
    best_gpu = np.argmax(free_memory)
    print(f"GPU free memory: {free_memory}")
    print(f"Selected GPU {best_gpu} with {free_memory[best_gpu]} MiB free")
    return best_gpu

# Uncomment to auto-select, or manually set GPU index:
# gpu_index = get_free_gpu()
gpu_index = 0   # <--- CHANGE MANUALLY IF NEEDED
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_index)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device} (GPU {gpu_index})")

Using device: cuda (GPU 0)


In [2]:
# ---- Load data ----
DRIVE_URL = "https://drive.google.com/uc?id=1PS0eB8dx8VMzVvxNUc6wBzsMRkEKJjWI"
df = pd.read_csv(DRIVE_URL)

# ---- Physics constants ----
M_p = 938.272088
M_n = 939.565420
epsilon = 1e-30
LN10 = np.log(10.0)
e2_hbar_c = 1/137.035999

def mass(Z, A):
    return Z*M_p + (A-Z)*M_n

# ---- Feature engineering (copied from your working code) ----
df["N1"] = df["A1"] - df["Z1"]
df["N2"] = df["A2"] - df["Z2"]

df["mass1"] = mass(df["Z1"], df["A1"])
df["mass2"] = mass(df["Z2"], df["A2"])

mu_red = (df["mass1"] * df["mass2"]) / (df["mass1"] + df["mass2"] + 1e-12)
v_over_c = np.sqrt(np.clip(2*df["E c.m."].values/(mu_red+epsilon), 0, np.inf))
df["eta"] = (df["Z1"]*df["Z2"]) / (e2_hbar_c * (v_over_c + 1e-16))

df["Z1Z2_over_Ecm"] = (df["Z1"]*df["Z2"]) / (df["E c.m."] + epsilon)

MAGIC = np.array([2,8,20,28,50,82,126])
df["magic_dist_Z1"] = np.min(np.abs(df["Z1"].values[:,None] - MAGIC), axis=1)
df["magic_dist_N1"] = np.min(np.abs(df["N1"].values[:,None] - MAGIC), axis=1)
df["magic_dist_Z2"] = np.min(np.abs(df["Z2"].values[:,None] - MAGIC), axis=1)
df["magic_dist_N2"] = np.min(np.abs(df["N2"].values[:,None] - MAGIC), axis=1)

# Coulomb barrier
barrier_df = df.groupby("Reaction").first().reset_index()
barrier_df["V_B"] = (barrier_df["Z1"]*barrier_df["Z2"]*1.44) / barrier_df["R B"]
df = df.merge(barrier_df[["Reaction","V_B"]], on="Reaction", how="left")

# S-factor logs
log10_sigma_exp = np.log10(np.clip(df["œÉ"], 1e-30, np.inf))
log10_sigma_cal = np.log10(np.clip(df["œÉ cal"], 1e-30, np.inf))
log10_Ecm = np.log10(np.clip(df["E c.m."], 1e-30, np.inf))

df["log10_S_exp"] = log10_sigma_exp + log10_Ecm + (2*np.pi*df["eta"])/LN10
df["log10_S_cal"] = log10_sigma_cal + log10_Ecm + (2*np.pi*df["eta"])/LN10
df["delta_log10_S"] = df["log10_S_exp"] - df["log10_S_cal"]

print("Data ready. Total rows:", len(df))

Data ready. Total rows: 3532


In [3]:
features_train = [
    'E c.m.', 'Z1', 'N1', 'A1',
    'Z2', 'N2', 'A2', 'Q ( 2 n )',
    'Z1Z2_over_Ecm',
    'magic_dist_Z1','magic_dist_N1','magic_dist_Z2','magic_dist_N2',
    'Z3','N3','A3','Œ≤ P','Œ≤ T','R B','ƒß œâ',
    'Projectile_Mass_Actual','Target_Mass_Actual','Compound_Nucleus_Mass_Actual',
    'Compound_Nucleus_Sp','Compound_Nucleus_Sn',
    'Projectile_Binding_Energy','Target_Binding_Energy',
    'Compound_Nucleus_Binding_Energy','Compound_Nucleus_S2n'
]

OUTDIR_BASE = "mdn_70_10_20_optimized"
train_reacts = pd.read_csv(f"{OUTDIR_BASE}/train_reactions.csv")["Reaction"].values
val_reacts   = pd.read_csv(f"{OUTDIR_BASE}/val_reactions.csv")["Reaction"].values
test_reacts  = pd.read_csv(f"{OUTDIR_BASE}/test_reactions.csv")["Reaction"].values

train_mask = df["Reaction"].isin(train_reacts)
val_mask   = df["Reaction"].isin(val_reacts)
test_mask  = df["Reaction"].isin(test_reacts)

X = df[features_train].values.astype(np.float32)
y = df["delta_log10_S"].values.astype(np.float32).reshape(-1,1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train = X_scaled[train_mask]
y_train = y[train_mask]
X_val   = X_scaled[val_mask]
y_val   = y[val_mask]
X_test  = X_scaled[test_mask]
y_test  = y[test_mask]

print(f"Train: {X_train.shape[0]}, Val: {X_val.shape[0]}, Test: {X_test.shape[0]}")

Train: 2493, Val: 354, Test: 685


In [4]:
# ==================================================
# ADJUST THESE PARAMETERS FOR EACH EXPERIMENT
# ==================================================
n_qubits = 15
n_layers = 14
batch_size = 32              # physical batch size (use smaller if OOM)
accumulation_steps = 2        # effective batch = batch_size * accumulation_steps
learning_rate = 1e-3
weight_decay = 1e-4
epochs = 100
patience = 10
seed = 42                     # random seed for reproducibility
dropout_rate = 0.2

# Effective batch size for logging
effective_batch = batch_size * accumulation_steps
print(f"Qubits: {n_qubits}, Layers: {n_layers}, Batch: {batch_size} (eff. {effective_batch})")

# ---- Set seeds ----
torch.manual_seed(seed)
np.random.seed(seed)

# ---- Automatic naming ----
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
model_name = f"qnn_q{n_qubits}_l{n_layers}_b{batch_size}_acc{accumulation_steps}_s{seed}_{timestamp}"
print("Model name:", model_name)

Qubits: 15, Layers: 14, Batch: 32 (eff. 64)
Model name: qnn_q15_l14_b32_acc2_s42_20260228_121144


In [5]:
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, interface="torch", diff_method="backprop")
def qnode(weights, x):
    qml.templates.AngleEmbedding(x, wires=range(n_qubits), rotation="X")
    qml.templates.StronglyEntanglingLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

In [6]:
class QuantumRegressor(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.encoder = nn.Linear(in_dim, n_qubits)
        self.q_weights = nn.Parameter(0.01 * torch.randn(n_layers, n_qubits, 3))
        self.fc1 = nn.Linear(n_qubits, 16)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(16, 1)

    def forward(self, x):
        x = x.float()
        x_enc = torch.tanh(self.encoder(x))
        q_out_tuple = qnode(self.q_weights, x_enc)
        q_out = torch.stack(q_out_tuple, dim=1)
        q_out = q_out.to(x.dtype)
        h = torch.relu(self.fc1(q_out))
        h = self.dropout(h)
        return self.fc2(h)

In [7]:
def make_loader(X, y, batch, shuffle=True):
    return DataLoader(
        TensorDataset(torch.tensor(X, dtype=torch.float32),
                      torch.tensor(y, dtype=torch.float32)),
        batch_size=batch,
        shuffle=shuffle,
        num_workers=0,
        pin_memory=True
    )

train_loader = make_loader(X_train, y_train, batch_size, shuffle=True)
val_loader   = make_loader(X_val, y_val, batch_size, shuffle=False)
test_loader  = make_loader(X_test, y_test, batch_size, shuffle=False)

print(f"Train batches: {len(train_loader)}, Val batches: {len(val_loader)}")

Train batches: 78, Val batches: 12


In [8]:
model = QuantumRegressor(in_dim=29).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

# Tracking variables
train_losses = []
val_losses = []
best_val_loss = float('inf')
patience_counter = 0
best_epoch = 0

In [None]:
import time   # make sure time is imported

print(f"\nüöÄ Starting training: {model_name}\n")
for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    optimizer.zero_grad()
    start_time = time.time()   # <-- start timer

    for batch_idx, (xb, yb) in enumerate(train_loader):
        xb, yb = xb.to(device), yb.to(device)
        preds = model(xb)
        loss = criterion(preds, yb) / accumulation_steps
        loss.backward()

        if (batch_idx + 1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()

        epoch_loss += loss.item() * xb.size(0) * accumulation_steps

    if (len(train_loader) % accumulation_steps) != 0:
        optimizer.step()
        optimizer.zero_grad()

    epoch_loss /= len(train_loader.dataset)
    train_losses.append(epoch_loss)

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            val_loss += criterion(preds, yb).item() * xb.size(0)
    val_loss /= len(val_loader.dataset)
    val_losses.append(val_loss)

    scheduler.step(val_loss)
    epoch_time = time.time() - start_time   # <-- compute elapsed time
    current_lr = optimizer.param_groups[0]['lr']

    # Logging with time and LR
    print(f"Epoch {epoch+1:3d}/{epochs} | Time: {epoch_time:.2f}s | "
          f"Train Loss: {epoch_loss:.6f} | Val Loss: {val_loss:.6f} | LR: {current_lr:.2e}")

    # Early stopping and model saving
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        patience_counter = 0
        torch.save(model.state_dict(), f"{model_name}_best.pt")
        print(f"  *** New best model saved (epoch {epoch+1}) ***")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

print("\n‚úÖ Training finished.")


üöÄ Starting training: qnn_q15_l14_b32_acc2_s42_20260228_121144

Epoch   1/100 | Time: 146.63s | Train Loss: 0.093895 | Val Loss: 0.106357 | LR: 1.00e-03
  *** New best model saved (epoch 1) ***
Epoch   2/100 | Time: 150.83s | Train Loss: 0.083180 | Val Loss: 0.106518 | LR: 1.00e-03
Epoch   3/100 | Time: 150.81s | Train Loss: 0.077519 | Val Loss: 0.105585 | LR: 1.00e-03
  *** New best model saved (epoch 3) ***
Epoch   4/100 | Time: 150.69s | Train Loss: 0.075077 | Val Loss: 0.105927 | LR: 1.00e-03
Epoch   5/100 | Time: 150.58s | Train Loss: 0.074387 | Val Loss: 0.104597 | LR: 1.00e-03
  *** New best model saved (epoch 5) ***
Epoch   6/100 | Time: 148.10s | Train Loss: 0.072368 | Val Loss: 0.107300 | LR: 1.00e-03
Epoch   7/100 | Time: 148.25s | Train Loss: 0.071133 | Val Loss: 0.104925 | LR: 1.00e-03


In [None]:
# Save losses
np.save(f"{model_name}_train_losses.npy", np.array(train_losses))
np.save(f"{model_name}_val_losses.npy", np.array(val_losses))

# Save hyperparameters as text
with open(f"{model_name}_params.txt", "w") as f:
    f.write(f"n_qubits: {n_qubits}\n")
    f.write(f"n_layers: {n_layers}\n")
    f.write(f"batch_size: {batch_size}\n")
    f.write(f"accumulation_steps: {accumulation_steps}\n")
    f.write(f"learning_rate: {learning_rate}\n")
    f.write(f"weight_decay: {weight_decay}\n")
    f.write(f"seed: {seed}\n")
    f.write(f"dropout_rate: {dropout_rate}\n")
    f.write(f"best_epoch: {best_epoch+1}\n")
    f.write(f"best_val_loss: {best_val_loss:.6f}\n")

print("Training history saved.")

In [None]:
# Load best model
model.load_state_dict(torch.load(f"{model_name}_best.pt", map_location=device))
model.eval()

def evaluate(loader, name):
    preds, truth = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            out = model(xb)
            preds.append(out.cpu().numpy())
            truth.append(yb.numpy())
    preds = np.vstack(preds)
    truth = np.vstack(truth)
    mse = mean_squared_error(truth, preds)
    rmse = np.sqrt(mse)
    r2 = r2_score(truth, preds)
    print(f"{name} -> MSE: {mse:.6f}, RMSE: {rmse:.6f}, R¬≤: {r2:.6f}")
    return mse, rmse, r2, preds, truth

# Evaluate
train_mse, train_rmse, train_r2, train_pred, train_true = evaluate(train_loader, "TRAIN")
val_mse, val_rmse, val_r2, val_pred, val_true = evaluate(val_loader, "VALIDATION")
test_mse, test_rmse, test_r2, test_pred, test_true = evaluate(test_loader, "TEST")

# Save predictions
np.savez(f"{model_name}_predictions.npz",
         train_pred=train_pred, train_true=train_true,
         val_pred=val_pred, val_true=val_true,
         test_pred=test_pred, test_true=test_true)

In [None]:
# This cell extracts the 14-dimensional quantum feature vector (‚ü®Z_i‚ü©) for all samples
# Useful for PCA, regime separation, etc.

def get_embeddings(loader):
    embeddings = []
    with torch.no_grad():
        for xb, _ in loader:
            xb = xb.to(device)
            x_enc = torch.tanh(model.encoder(xb))
            q_out_tuple = qnode(model.q_weights, x_enc)
            q_out = torch.stack(q_out_tuple, dim=1).cpu().numpy()
            embeddings.append(q_out)
    return np.vstack(embeddings)

train_emb = get_embeddings(train_loader)
val_emb   = get_embeddings(val_loader)
test_emb  = get_embeddings(test_loader)

np.savez(f"{model_name}_embeddings.npz",
         train_emb=train_emb, val_emb=val_emb, test_emb=test_emb)
print("Embeddings saved.")

In [None]:
plt.figure(figsize=(10,5))
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.axvline(x=best_epoch, color='r', linestyle='--', label='Best Model')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.title(f'Training History: {model_name}')
plt.legend()
plt.grid(alpha=0.3)
plt.savefig(f"{model_name}_loss_curve.png", dpi=150)
plt.show()

In [None]:
fig, axes = plt.subplots(1,3, figsize=(15,4))
datasets = [('Train', train_true, train_pred),
            ('Validation', val_true, val_pred),
            ('Test', test_true, test_pred)]
for ax, (name, y_true, y_pred) in zip(axes, datasets):
    ax.scatter(y_true, y_pred, alpha=0.5)
    ax.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--')
    ax.set_xlabel('True Œîlog‚ÇÅ‚ÇÄS')
    ax.set_ylabel('Predicted Œîlog‚ÇÅ‚ÇÄS')
    ax.set_title(f'{name} (R¬≤ = {r2_score(y_true, y_pred):.3f})')
    ax.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(f"{model_name}_scatter.png", dpi=150)
plt.show()