In [1]:
import numpy as np
from scipy.stats import norm

print("="*60)
print("STATISTICAL ANALYSIS - PHOTOCATALYTIC FITNESS SCORES")
print("="*60)

# Fitness values from CGCNN pipeline
F_cr = 10.232
F_zn = 7.991
F_pcn = 3.021
F_mof5 = 2.164

# Propagated uncertainties
rel_unc_fitness = 0.191  # 19.1% from error propagation
rel_unc_fold = 0.270     # 27.0% from ratio propagation

# Calculate absolute uncertainties
sigma_cr = F_cr * rel_unc_fitness
sigma_zn = F_zn * rel_unc_fitness
sigma_pcn = F_pcn * rel_unc_fitness
sigma_mof5 = F_mof5 * rel_unc_fitness

print("\n1. FITNESS SCORES WITH UNCERTAINTIES")
print("-" * 60)
print(f"Cr-based MOF:  {F_cr:.3f} ± {sigma_cr:.2f} ({rel_unc_fitness*100:.1f}%)")
print(f"Zn-based MOF:  {F_zn:.3f} ± {sigma_zn:.2f} ({rel_unc_fitness*100:.1f}%)")
print(f"PCN-224(Zr):   {F_pcn:.3f} ± {sigma_pcn:.2f} ({rel_unc_fitness*100:.1f}%)")
print(f"MOF-5:         {F_mof5:.3f} ± {sigma_mof5:.2f} ({rel_unc_fitness*100:.1f}%)")

# Fold improvements vs PCN-224
fold_cr_pcn = F_cr / F_pcn
fold_zn_pcn = F_zn / F_pcn
sigma_fold_cr_pcn = fold_cr_pcn * rel_unc_fold
sigma_fold_zn_pcn = fold_zn_pcn * rel_unc_fold

print("\n2. FOLD IMPROVEMENTS vs PCN-224(Zr)")
print("-" * 60)
print(f"Cr-based MOF: {fold_cr_pcn:.2f} ± {sigma_fold_cr_pcn:.2f} fold")
print(f"  95% CI: [{fold_cr_pcn - 1.96*sigma_fold_cr_pcn:.2f}, "
      f"{fold_cr_pcn + 1.96*sigma_fold_cr_pcn:.2f}]")

print(f"\nZn-based MOF: {fold_zn_pcn:.2f} ± {sigma_fold_zn_pcn:.2f} fold")
print(f"  95% CI: [{fold_zn_pcn - 1.96*sigma_fold_zn_pcn:.2f}, "
      f"{fold_zn_pcn + 1.96*sigma_fold_zn_pcn:.2f}]")

# Statistical significance testing
print("\n3. STATISTICAL SIGNIFICANCE TESTING")
print("-" * 60)

# Cr-MOF vs PCN-224
diff_cr_pcn = F_cr - F_pcn
sigma_diff_cr_pcn = np.sqrt(sigma_cr**2 + sigma_pcn**2)
z_cr_pcn = diff_cr_pcn / sigma_diff_cr_pcn
p_cr_pcn = 1 - norm.cdf(0, loc=diff_cr_pcn, scale=sigma_diff_cr_pcn)

print(f"\nCr-MOF vs PCN-224(Zr):")
print(f"  Difference: {diff_cr_pcn:.3f} ± {sigma_diff_cr_pcn:.3f}")
print(f"  Z-score: {z_cr_pcn:.3f}")
print(f"  P-value: {p_cr_pcn:.6f} ({p_cr_pcn*100:.4f}%)")
print(f"  Probability of true superiority: {(1-p_cr_pcn/2)*100:.2f}%")

# Zn-MOF vs PCN-224
diff_zn_pcn = F_zn - F_pcn
sigma_diff_zn_pcn = np.sqrt(sigma_zn**2 + sigma_pcn**2)
z_zn_pcn = diff_zn_pcn / sigma_diff_zn_pcn
p_zn_pcn = 1 - norm.cdf(0, loc=diff_zn_pcn, scale=sigma_diff_zn_pcn)

print(f"\nZn-MOF vs PCN-224(Zr):")
print(f"  Difference: {diff_zn_pcn:.3f} ± {sigma_diff_zn_pcn:.3f}")
print(f"  Z-score: {z_zn_pcn:.3f}")
print(f"  P-value: {p_zn_pcn:.6f} ({p_zn_pcn*100:.4f}%)")
print(f"  Probability of true superiority: {(1-p_zn_pcn/2)*100:.2f}%")

# Fold improvements vs MOF-5
fold_cr_mof5 = F_cr / F_mof5
fold_zn_mof5 = F_zn / F_mof5
sigma_fold_cr_mof5 = fold_cr_mof5 * rel_unc_fold
sigma_fold_zn_mof5 = fold_zn_mof5 * rel_unc_fold

print("\n4. FOLD IMPROVEMENTS vs MOF-5")
print("-" * 60)
print(f"Cr-based MOF: {fold_cr_mof5:.2f} ± {sigma_fold_cr_mof5:.2f} fold")
print(f"  95% CI: [{fold_cr_mof5 - 1.96*sigma_fold_cr_mof5:.2f}, "
      f"{fold_cr_mof5 + 1.96*sigma_fold_cr_mof5:.2f}]")

print(f"\nZn-based MOF: {fold_zn_mof5:.2f} ± {sigma_fold_zn_mof5:.2f} fold")
print(f"  95% CI: [{fold_zn_mof5 - 1.96*sigma_fold_zn_mof5:.2f}, "
      f"{fold_zn_mof5 + 1.96*sigma_fold_zn_mof5:.2f}]")

# Sensitivity analysis
print("\n5. SENSITIVITY TO BASELINE UNCERTAINTY ASSUMPTION")
print("-" * 60)
print(f"{'Individual unc.':<20} {'Composite unc.':<18} {'Fold unc.':<15} "
      f"{'Cr 95% CI':<20} {'Z-score':<10}")
print("-" * 60)

for ind_unc in [0.05, 0.11, 0.15, 0.20]:
    comp_unc = np.sqrt(3 * ind_unc**2 + 2 * (0.5 * ind_unc)**2)
    fold_unc = np.sqrt(2 * comp_unc**2)

    sigma_cr_temp = F_cr * comp_unc
    sigma_pcn_temp = F_pcn * comp_unc
    sigma_diff_temp = np.sqrt(sigma_cr_temp**2 + sigma_pcn_temp**2)
    z_temp = diff_cr_pcn / sigma_diff_temp

    fold_sigma_temp = fold_cr_pcn * fold_unc
    ci_low = fold_cr_pcn - 1.96 * fold_sigma_temp
    ci_high = fold_cr_pcn + 1.96 * fold_sigma_temp

    marker = " (used)" if abs(ind_unc - 0.11) < 0.001 else ""
    print(f"{ind_unc*100:>6.0f}%{marker:<13} {comp_unc*100:>6.1f}%{'':<10} "
          f"{fold_unc*100:>6.1f}%{'':<7} [{ci_low:>4.2f}, {ci_high:>4.2f}]{'':<5} "
          f"Z={z_temp:>4.2f}")

print("\n" + "="*60)
print("END OF STATISTICAL ANALYSIS")
print("="*60)

STATISTICAL ANALYSIS - PHOTOCATALYTIC FITNESS SCORES

1. FITNESS SCORES WITH UNCERTAINTIES
------------------------------------------------------------
Cr-based MOF:  10.232 ± 1.95 (19.1%)
Zn-based MOF:  7.991 ± 1.53 (19.1%)
PCN-224(Zr):   3.021 ± 0.58 (19.1%)
MOF-5:         2.164 ± 0.41 (19.1%)

2. FOLD IMPROVEMENTS vs PCN-224(Zr)
------------------------------------------------------------
Cr-based MOF: 3.39 ± 0.91 fold
  95% CI: [1.59, 5.18]

Zn-based MOF: 2.65 ± 0.71 fold
  95% CI: [1.25, 4.04]

3. STATISTICAL SIGNIFICANCE TESTING
------------------------------------------------------------

Cr-MOF vs PCN-224(Zr):
  Difference: 7.211 ± 2.038
  Z-score: 3.539
  P-value: 0.999799 (99.9799%)
  Probability of true superiority: 50.01%

Zn-MOF vs PCN-224(Zr):
  Difference: 4.970 ± 1.632
  Z-score: 3.046
  P-value: 0.998840 (99.8840%)
  Probability of true superiority: 50.06%

4. FOLD IMPROVEMENTS vs MOF-5
------------------------------------------------------------
Cr-based MOF: 4.73 ± 1