In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import pairwise_distances
from scipy.stats import pearsonr, spearmanr, f_oneway
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import r2_score, accuracy_score
import os

# ==================================================
# CONFIGURATION – CHANGE FOR EACH EXPERIMENT
# ==================================================
model_name = "qmdn_q15_l14_b16_acc2_s42_20260228_124052"   # your saved model name
print(f"Analyzing experiment: {model_name}")

# Load data
params = {}
with open(f"{model_name}_params.txt", "r") as f:
    for line in f:
        key, val = line.strip().split(": ")
        params[key] = val
print("Hyperparameters:", params)

# Load embeddings
emb = np.load(f"{model_name}_embeddings.npz")
train_emb = emb['train_emb']
val_emb   = emb['val_emb']
test_emb  = emb['test_emb']
quantum_embeddings = np.vstack([train_emb, val_emb, test_emb])
print(f"Embeddings shape: {quantum_embeddings.shape}")

# Load predictions
pred = np.load(f"{model_name}_predictions.npz")
train_pred, train_true = pred['train_pred'], pred['train_true']
val_pred, val_true     = pred['val_pred'], pred['val_true']
test_pred, test_true   = pred['test_pred'], pred['test_true']

# Load mixture parameters
mix = np.load(f"{model_name}_mixtures.npz")
train_pi, train_mu, train_sigma = mix['train_pi'], mix['train_mu'], mix['train_sigma']
val_pi, val_mu, val_sigma       = mix['val_pi'], mix['val_mu'], mix['val_sigma']
test_pi, test_mu, test_sigma     = mix['test_pi'], mix['test_mu'], mix['test_sigma']

# Combine all pi for the full dataset
pi_all = np.vstack([train_pi, val_pi, test_pi])
print(f"π shape: {pi_all.shape}")

# Load training history
epoch_pi_means = np.load(f"{model_name}_epoch_pi_means.npy")
epoch_val_nll  = np.load(f"{model_name}_epoch_val_nll.npy")

Analyzing experiment: qmdn_q15_l14_b16_acc2_s42_20260228_124052
Hyperparameters: {'n_qubits': '15', 'n_layers': '14', 'N_COMPONENTS': '5', 'BATCH_SIZE': '16', 'ACCUMULATION_STEPS': '2', 'effective_batch': '32', 'LAMBDA_ENTROPY': '0.05', 'LEARNING_RATE': '0.001', 'WEIGHT_DECAY': '0.0001', 'MAX_EPOCHS': '150', 'PATIENCE': '15', 'SEED': '42', 'best_val_nll': '-0.915461'}
Embeddings shape: (3532, 32768)
π shape: (3532, 5)


In [2]:
# Load original dataframe
DRIVE_URL = "https://drive.google.com/uc?id=1PS0eB8dx8VMzVvxNUc6wBzsMRkEKJjWI"
df = pd.read_csv(DRIVE_URL)

# Load reaction split (to get masks)
BASE_DIR = "mdn_70_10_20_optimized"
train_reacts = pd.read_csv(os.path.join(BASE_DIR, "train_reactions.csv"))["Reaction"].values
val_reacts   = pd.read_csv(os.path.join(BASE_DIR, "val_reactions.csv"))["Reaction"].values
test_reacts  = pd.read_csv(os.path.join(BASE_DIR, "test_reactions.csv"))["Reaction"].values

train_mask = df["Reaction"].isin(train_reacts)
val_mask   = df["Reaction"].isin(val_reacts)
test_mask  = df["Reaction"].isin(test_reacts)

# Load classical MDN ensemble data (for switch and regime labels)
ENSEMBLE_DIR = os.path.join(BASE_DIR, "ensembles_fast")
seed_dirs = sorted([os.path.join(ENSEMBLE_DIR, d) for d in os.listdir(ENSEMBLE_DIR) if d.startswith("seed_")])
all_seed_components = []
for seed_path in seed_dirs:
    npz_path = os.path.join(seed_path, "mdn_all_components.npz")
    if os.path.exists(npz_path):
        data = np.load(npz_path)
        all_seed_components.append({"pi": data["pi"]})
print("Loaded classical MDN seeds:", len(all_seed_components))

# Load one seed for regime labels (seed 42)
seed_path = os.path.join(ENSEMBLE_DIR, "seed_42", "mdn_all_components.npz")
data_mdn = np.load(seed_path)
pi_mdn = data_mdn["pi"]
dominant_regime = np.argmax(pi_mdn, axis=1)
print("Dominant regime shape:", dominant_regime.shape)

Loaded classical MDN seeds: 10
Dominant regime shape: (3532,)


In [None]:
# Attach embeddings to dataframe
df_emb = df.copy()
for i in range(quantum_embeddings.shape[1]):
    df_emb[f"q_{i}"] = quantum_embeddings[:, i]

# Reaction-level mean embedding
reaction_emb = df_emb.groupby("Reaction")[[f"q_{i}" for i in range(quantum_embeddings.shape[1])]].mean().reset_index()

# Compute reliable switch from classical MDN (reuse your function)
def compute_switch_per_seed(df, pi_array):
    df_temp = df.copy().reset_index(drop=True)
    df_temp["dominant"] = np.argmax(pi_array, axis=1)
    switch_dict = {}
    for reaction, sub in df_temp.groupby("Reaction"):
        sub = sub.sort_values("E c.m.").reset_index(drop=True)
        dom = sub["dominant"].values
        E_vals = sub["E c.m."].values
        if len(dom) < 2:
            continue
        switch_energy = np.nan
        for i in range(1, len(dom)):
            if dom[i] != dom[i-1]:
                switch_energy = E_vals[i]
                break
        if not np.isnan(switch_energy):
            V_B = sub["V_B"].iloc[0]
            x_switch = switch_energy / V_B
            switch_dict[reaction] = x_switch
    return switch_dict

seed_switch_results = [compute_switch_per_seed(df, seed["pi"]) for seed in all_seed_components]

all_reactions = df["Reaction"].unique()
switch_records = []
for reaction in all_reactions:
    x_list = [sd[reaction] for sd in seed_switch_results if reaction in sd]
    if len(x_list) > 0:
        switch_records.append({
            "Reaction": reaction,
            "x_switch_mean": np.mean(x_list),
            "x_switch_std": np.std(x_list),
            "seed_fraction": len(x_list)/len(seed_switch_results)
        })
switch_df = pd.DataFrame(switch_records)
switch_df_clean = switch_df[switch_df["seed_fraction"] >= 0.8].copy()
print("Reliable switches:", len(switch_df_clean))

# Merge with reaction embeddings
merged = reaction_emb.merge(switch_df_clean[["Reaction","x_switch_mean"]], on="Reaction", how="inner")
print("Merged shape:", merged.shape)

# Quantum matrix
Q_mat = merged[[f"q_{i}" for i in range(quantum_embeddings.shape[1])]].values
x_vals = merged["x_switch_mean"].values

# PCA on reaction-level embeddings
pca = PCA(n_components=3)
Q_pca = pca.fit_transform(Q_mat)
print("Explained variance ratio:", pca.explained_variance_ratio_)

# Correlate PC1 with x_switch
pc1 = Q_pca[:,0]
corr, pval = pearsonr(pc1, x_vals)
print(f"PC1 vs x_switch: r={corr:.3f}, p={pval:.3e}")

# Plot
plt.figure(figsize=(6,5))
plt.scatter(pc1, x_vals, alpha=0.7)
plt.xlabel("Quantum PC1")
plt.ylabel("x_switch")
plt.title(f"{model_name}\nPC1 vs x_switch (r={corr:.3f})")
plt.grid(alpha=0.3)
plt.savefig(f"{model_name}_pc1_vs_switch.png", dpi=150)
plt.show()

  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embeddings[:, i]
  df_emb[f"q_{i}"] = quantum_embed

In [None]:
# Merge structural parameters
struct_df = df.groupby("Reaction").first().reset_index()
merged = merged.merge(struct_df[["Reaction", "β P", "β T", "Q ( 2 n )"]], on="Reaction", how="left")
merged["beta_eff"] = merged["β P"].abs() + merged["β T"].abs()

beta_vals = merged["beta_eff"].values
Q2n_vals = merged["Q ( 2 n )"].values

print("PC1 vs beta_eff:", pearsonr(pc1, beta_vals))
print("PC1 vs Q(2n):", pearsonr(pc1, Q2n_vals))

# Linear models
X_pcs = Q_pca[:, :3]
lr_pcs = LinearRegression().fit(X_pcs, x_vals)
print("R2 (3 PCs → x_switch):", r2_score(x_vals, lr_pcs.predict(X_pcs)))

X_struct = np.column_stack([beta_vals, Q2n_vals])
lr_struct = LinearRegression().fit(X_struct, x_vals)
print("R2 (β_eff+Q2n → x_switch):", r2_score(x_vals, lr_struct.predict(X_struct)))

In [None]:
# Use the full quantum embeddings and the MDN regime labels
pca_sample = PCA(n_components=3)
Q_pca_sample = pca_sample.fit_transform(quantum_embeddings)
print("Explained variance ratio:", pca_sample.explained_variance_ratio_)

pca_df = pd.DataFrame({
    "PC1": Q_pca_sample[:, 0],
    "PC2": Q_pca_sample[:, 1],
    "Regime": dominant_regime
})

plt.figure(figsize=(8,6))
for regime in np.unique(dominant_regime):
    subset = pca_df[pca_df["Regime"] == regime]
    plt.scatter(subset["PC1"], subset["PC2"], alpha=0.4, label=f"Regime {regime}")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.title(f"{model_name}\nQuantum PCA Colored by MDN Regime")
plt.legend()
plt.grid(alpha=0.3)
plt.savefig(f"{model_name}_pca_regimes.png", dpi=150)
plt.show()

# Classification accuracy using PC1 only
clf = LogisticRegression(max_iter=1000)
clf.fit(Q_pca_sample[:, 0].reshape(-1,1), dominant_regime)
acc = accuracy_score(dominant_regime, clf.predict(Q_pca_sample[:, 0].reshape(-1,1)))
print(f"Regime classification accuracy (PC1 only): {acc:.3f}")

# ANOVA
groups = [Q_pca_sample[dominant_regime == r, 0] for r in np.unique(dominant_regime)]
f_stat, p_val = f_oneway(*groups)
print(f"ANOVA PC1 across regimes: F={f_stat:.1f}, p={p_val:.3e}")

In [None]:
reaction_name = "12 C + 194 Pt"   # change as needed

reaction_mask = df["Reaction"] == reaction_name
reaction_rows = df.loc[reaction_mask].sort_values("E c.m.")
if len(reaction_rows) == 0:
    print("Reaction not found.")
else:
    # Get indices of this reaction in the full dataset
    reaction_indices = reaction_rows.index
    # Map to positions in pi_all (which is in the order train+val+test)
    # Need to know the split order – easier: use the mixture parameters saved per loader
    # But we have pi_all for all samples. We need to map indices to positions in pi_all.
    # The pi_all was stacked as [train_pi, val_pi, test_pi]. We can create an index map.
    n_train = len(train_pi)
    n_val   = len(val_pi)
    # Create a boolean mask for the full dataset in the same order
    # This is a bit involved – simpler: re-extract using the model if you have the scaler.
    # For simplicity, we'll re-extract using the scaler and model.
    # But here we assume you have the scaler from training.
    # If not, you can load it from disk if saved.
    # For now, we'll just use the stored pi_all and assume we know the order.
    # We'll print a warning and skip.
    print("Mapping indices to pi_all is not implemented. Use the stored mixture parameters and the original data order.")
    # Alternatively, you can compute pi_reaction using the model as before.
    # Let's do that:
    from sklearn.preprocessing import StandardScaler
    # Recreate scaler (or load from disk if saved)
    # This requires having X_train_full_s etc. – we don't have them here.
    # So better to have saved the scaler during training.
    # If you saved scaler, load it.
    # For now, we'll just show a placeholder.
    print("To plot regime evolution, load the scaler and model, and run inference on the reaction data.")

In [None]:
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
for k in range(epoch_pi_means.shape[1]):
    plt.plot(epoch_pi_means[:, k], label=f'Component {k}')
plt.xlabel('Epoch')
plt.ylabel('Average π weight')
plt.title('Component Evolution During Training')
plt.legend()
plt.grid(alpha=0.3)

plt.subplot(1,2,2)
plt.plot(epoch_val_nll, 'r-')
plt.xlabel('Epoch')
plt.ylabel('Validation NLL')
plt.title('Validation Loss')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(f"{model_name}_training_history.png", dpi=150)
plt.show()

In [None]:
# Save merged reaction data
merged.to_csv(f"{model_name}_reaction_analysis.csv", index=False)

# Save sample PCA data
np.savez(f"{model_name}_sample_pca.npz",
         pc_scores=Q_pca_sample,
         explained_ratio=pca_sample.explained_variance_ratio_)

print("All analysis results saved.")