In [9]:
import os
import numpy as np
import pandas as pd
from pathlib import Path



# ---------------------------------------
# Load original dataframe
# ---------------------------------------
DRIVE_URL = "https://drive.google.com/uc?id=1PS0eB8dx8VMzVvxNUc6wBzsMRkEKJjWI"
df = pd.read_csv(DRIVE_URL)

# ---------------------------------------
OUTDIR_BASE = "mdn_70_10_20_optimized"
ENSEMBLE_DIR = os.path.join(OUTDIR_BASE, "ensembles_fast")

# ---------------------------------------
# Load reaction splits
# ---------------------------------------
train_reacts = pd.read_csv(f"{OUTDIR_BASE}/train_reactions.csv")["Reaction"].values
val_reacts   = pd.read_csv(f"{OUTDIR_BASE}/val_reactions.csv")["Reaction"].values
test_reacts  = pd.read_csv(f"{OUTDIR_BASE}/test_reactions.csv")["Reaction"].values

print("Train reactions:", len(train_reacts))
print("Val reactions:", len(val_reacts))
print("Test reactions:", len(test_reacts))
# ---------------------------------------
# Compute Coulomb barrier height V_B
# ---------------------------------------

# Get one row per reaction
barrier_df = df.groupby("Reaction").first().reset_index()

# Compute Z1Z2
barrier_df["Z1Z2"] = barrier_df["Z1"] * barrier_df["Z2"]

# Compute Coulomb barrier height
barrier_df["V_B"] = (barrier_df["Z1Z2"] * 1.44) / barrier_df["R B"]

# Keep only needed columns
barrier_df = barrier_df[["Reaction", "V_B"]]

# Merge back into main dataframe
df = df.merge(barrier_df, on="Reaction", how="left")

print("Barrier heights computed and merged.")
print(barrier_df[["Reaction","V_B"]].head(5))
print("Total reactions in dataset:", df["Reaction"].nunique())

# ---------------- Physics feature engineering ----------------
M_p = 938.272088; M_n = 939.565420; epsilon=1e-30; LN10=np.log(10.0)

def get_nucleon_mass(Z,A): return Z*M_p + (A-Z)*M_n

mass1 = df.apply(lambda r: get_nucleon_mass(r["Z1"], r["A1"]), axis=1).values
mass2 = df.apply(lambda r: get_nucleon_mass(r["Z2"], r["A2"]), axis=1).values

mu_MeVc2 = (mass1 * mass2) / (mass1 + mass2 + 1e-12)
Ecm = df["E c.m."].astype(float).values
v_over_c = np.sqrt(np.clip(2*Ecm/(mu_MeVc2+epsilon),0,np.inf))
e2_hbar_c = 1/137.035999

df["eta"] = (df["Z1"]*df["Z2"]) / (e2_hbar_c*(v_over_c+1e-16))

log10_sigma_exp = np.log10(np.clip(df["σ"],1e-30,np.inf))
log10_sigma_cal = np.log10(np.clip(df["σ cal"],1e-30,np.inf))
log10_Ecm = np.log10(np.clip(df["E c.m."],1e-30,np.inf))

log10_exp_term = (2*np.pi*df["eta"])/LN10

df["log10_S_exp"] = log10_sigma_exp + log10_Ecm + log10_exp_term
df["log10_S_cal"] = log10_sigma_cal + log10_Ecm + log10_exp_term
df["delta_log10_S"] = df["log10_S_exp"] - df["log10_S_cal"]

df["N1"] = df["A1"] - df["Z1"]
df["N2"] = df["A2"] - df["Z2"]
df["Z1Z2_over_Ecm"] = (df["Z1"]*df["Z2"]) / (df["E c.m."] + epsilon)

MAGIC = np.array([2,8,20,28,50,82,126])
def magic_dist(arr): return np.min(np.abs(arr[:,None] - MAGIC[None,:]),axis=1)

df["magic_dist_Z1"] = magic_dist(df["Z1"].values)
df["magic_dist_N1"] = magic_dist(df["N1"].values)
df["magic_dist_Z2"] = magic_dist(df["Z2"].values)
df["magic_dist_N2"] = magic_dist(df["N2"].values)

# ---------------- 29 training features ----------------
features_train = [
    'E c.m.', 'Z1', 'N1', 'A1',
    'Z2', 'N2', 'A2', 'Q ( 2 n )',
    'Z1Z2_over_Ecm',
    'magic_dist_Z1','magic_dist_N1','magic_dist_Z2','magic_dist_N2',
    'Z3','N3','A3','β P','β T','R B','ħ ω',
    'Projectile_Mass_Actual', 'Target_Mass_Actual', 'Compound_Nucleus_Mass_Actual',
    'Compound_Nucleus_Sp','Compound_Nucleus_Sn',
    'Projectile_Binding_Energy','Target_Binding_Energy',
    'Compound_Nucleus_Binding_Energy','Compound_Nucleus_S2n'
]





Train reactions: 149
Val reactions: 21
Test reactions: 43
Barrier heights computed and merged.
        Reaction        V_B
0  12 C + 144 Sm  48.259459
1  12 C + 152 Sm  43.728980
2  12 C + 154 Sm  43.200000
3  12 C + 181 Ta  49.584906
4  12 C + 194 Pt  57.797599
Total reactions in dataset: 213


In [3]:
# ==========================================================
# QMDN SWITCH ANALYSIS — LOAD COMPONENTS
# ==========================================================

import numpy as np
import pandas as pd
import os

OUTDIR_BASE = "mdn_70_10_20_optimized"

# Load QMDN outputs (full dataset)
qmdn_file = "qmdn_entropy_all_components.npz"
data = np.load(qmdn_file)

pi_all = data["pi"]     # (N_rows, N_components)
mu_all = data["mu"]
sigma_all = data["sigma"]

print("QMDN loaded.")
print("pi shape:", pi_all.shape)

N_rows = pi_all.shape[0]

QMDN loaded.
pi shape: (3532, 5)


In [6]:
# Add dominant regime to dataframe

df_q = df.copy().reset_index(drop=True)

assert len(df_q) == N_rows, "Mismatch between QMDN outputs and dataframe rows."

df_q["dominant"] = np.argmax(pi_all, axis=1)

print("Dominant regimes assigned.")
print(df_q["dominant"].value_counts())

Dominant regimes assigned.
dominant
4    2044
0    1003
3     340
1     112
2      33
Name: count, dtype: int64


In [7]:
# ==========================================================
# Compute switch energy and x_switch (QMDN)
# ==========================================================

switch_records_q = []

for reaction, sub in df_q.groupby("Reaction"):
    
    sub = sub.sort_values("E c.m.").reset_index(drop=True)
    
    dom = sub["dominant"].values
    E_vals = sub["E c.m."].values
    
    if len(dom) < 2:
        continue
    
    switch_energy = np.nan
    
    for i in range(1, len(dom)):
        if dom[i] != dom[i-1]:
            switch_energy = E_vals[i]
            break
    
    if not np.isnan(switch_energy):
        
        V_B = sub["V_B"].iloc[0]
        
        switch_records_q.append({
            "Reaction": reaction,
            "E_switch": switch_energy,
            "V_B": V_B,
            "x_switch": switch_energy / V_B
        })

switch_df_q = pd.DataFrame(switch_records_q)

print("Total reactions with switch:", len(switch_df_q))
print(switch_df_q.head())

Total reactions with switch: 193
        Reaction  E_switch        V_B  x_switch
0  12 C + 144 Sm     44.21  48.259459  0.916090
1  12 C + 152 Sm     46.53  43.728980  1.064054
2  12 C + 154 Sm     45.18  43.200000  1.045833
3  12 C + 181 Ta     53.74  49.584906  1.083798
4  12 C + 194 Pt     53.21  57.797599  0.920626


In [10]:
switch_df_q["set"] = np.select(
    [
        switch_df_q["Reaction"].isin(test_reacts),
        switch_df_q["Reaction"].isin(val_reacts)
    ],
    ["test", "val"],
    default="train"
)

In [11]:
print("\nGlobal x_switch statistics (QMDN):")
print(switch_df_q["x_switch"].describe())

mean_x = switch_df_q["x_switch"].mean()
std_x  = switch_df_q["x_switch"].std()

print("\nMean x_switch:", round(mean_x,4))
print("Std x_switch:", round(std_x,4))


Global x_switch statistics (QMDN):
count    193.000000
mean       0.953411
std        0.065503
min        0.837973
25%        0.902982
50%        0.935674
75%        0.995076
max        1.209695
Name: x_switch, dtype: float64

Mean x_switch: 0.9534
Std x_switch: 0.0655


In [12]:
# ==========================================================
# Bootstrap confidence interval
# ==========================================================

N_BOOT = 2000
boot_means = []

values = switch_df_q["x_switch"].values

for _ in range(N_BOOT):
    sample = np.random.choice(values, size=len(values), replace=True)
    boot_means.append(np.mean(sample))

boot_means = np.array(boot_means)

ci_low  = np.percentile(boot_means, 2.5)
ci_high = np.percentile(boot_means, 97.5)

print("\nBootstrap 95% CI:")
print("Lower:", round(ci_low,4))
print("Upper:", round(ci_high,4))


Bootstrap 95% CI:
Lower: 0.9435
Upper: 0.963


In [13]:
print("\nTrain/Val/Test statistics:")
print(
    switch_df_q.groupby("set")["x_switch"]
    .agg(["count","mean","std"])
)


Train/Val/Test statistics:
       count      mean       std
set                             
test      39  0.948043  0.066382
train    135  0.957617  0.064895
val       19  0.934543  0.067398


In [14]:
high_outliers = switch_df_q.sort_values("x_switch", ascending=False).head(5)
low_outliers  = switch_df_q.sort_values("x_switch", ascending=True).head(5)

print("\nTop 5 highest x_switch:")
print(high_outliers[["Reaction","x_switch"]])

print("\nTop 5 lowest x_switch:")
print(low_outliers[["Reaction","x_switch"]])


Top 5 highest x_switch:
          Reaction  x_switch
24   16 O + 112 Cd  1.209695
72   28 Si + 28 Si  1.158580
78   28 Si + 94 Zr  1.129960
121  35 Cl + 27 Al  1.110090
73   28 Si + 30 Si  1.099133

Top 5 lowest x_switch:
           Reaction  x_switch
187  58 Ni + 124 Sn  0.837973
51    18 O + 112 Sn  0.853982
154  40 Ar + 116 Sn  0.858025
155  40 Ar + 122 Sn  0.859478
13   124 Sn + 40 Ca  0.860323


In [15]:
barrier_df_local = df.groupby("Reaction").first().reset_index()

switch_df_q = switch_df_q.merge(
    barrier_df_local[[
        "Reaction",
        "Q ( 2 n )",
        "β P",
        "β T"
    ]],
    on="Reaction",
    how="left"
)

switch_df_q["beta_eff"] = abs(switch_df_q["β P"]) + abs(switch_df_q["β T"])

print(switch_df_q[["x_switch","Q ( 2 n )","beta_eff"]].corr())

           x_switch  Q ( 2 n )  beta_eff
x_switch   1.000000  -0.288814  0.535492
Q ( 2 n ) -0.288814   1.000000 -0.075636
beta_eff   0.535492  -0.075636  1.000000
