# Loop 11 Strategic Analysis

## Goal: Decide whether to submit exp_011 and plan next experiments

### Key Questions:
1. Is exp_011's CV improvement statistically significant?
2. What does the CV-LB relationship predict for exp_011?
3. Should we submit or continue experimenting?

In [None]:
import numpy as np
import pandas as pd
from scipy import stats

# Submission history
submissions = [
    {'exp': 'exp_000', 'cv': 0.80674, 'lb': 0.79705},
    {'exp': 'exp_003', 'cv': 0.81951, 'lb': 0.80453},
    {'exp': 'exp_004', 'cv': 0.81928, 'lb': 0.80406},
    {'exp': 'exp_006', 'cv': 0.81709, 'lb': 0.80102},
]

df = pd.DataFrame(submissions)
df['gap'] = df['cv'] - df['lb']
df['gap_pct'] = df['gap'] / df['cv'] * 100
print("Submission History:")
print(df.to_string(index=False))
print(f"\nMean CV-LB gap: {df['gap'].mean():.5f} ({df['gap_pct'].mean():.2f}%)")

In [None]:
# Linear regression to predict LB from CV
from sklearn.linear_model import LinearRegression

X = df['cv'].values.reshape(-1, 1)
y = df['lb'].values

model = LinearRegression()
model.fit(X, y)

print(f"CV-LB Model: LB = {model.coef_[0]:.4f} * CV + {model.intercept_:.4f}")
print(f"R-squared = {model.score(X, y):.4f}")

# Predict LB for exp_011
exp_011_cv = 0.82032
predicted_lb = model.predict([[exp_011_cv]])[0]
print(f"\nexp_011 CV: {exp_011_cv:.5f}")
print(f"Predicted LB: {predicted_lb:.5f}")
print(f"Best LB so far: 0.80453")
print(f"Predicted improvement: {predicted_lb - 0.80453:.5f}")

In [None]:
# What CV is needed to beat best LB (0.80453)?
target_lb = 0.80453
required_cv = (target_lb - model.intercept_) / model.coef_[0]
print(f"To beat LB 0.80453, need CV > {required_cv:.5f}")
print(f"exp_011 CV: {exp_011_cv:.5f}")
print(f"Margin: {exp_011_cv - required_cv:.5f}")

# What about top LB (0.8066)?
top_lb = 0.8066
required_cv_top = (top_lb - model.intercept_) / model.coef_[0]
print(f"\nTo reach top LB 0.8066, need CV > {required_cv_top:.5f}")
print(f"Gap from current best: {required_cv_top - exp_011_cv:.5f}")

In [None]:
# Analyze fold variance from exp_011
fold_scores = [0.80460, 0.84253, 0.83333, 0.79862, 0.82969, 0.82048, 0.83774, 0.81013, 0.81243, 0.81358]

print("exp_011 Fold Scores:")
print(f"Mean: {np.mean(fold_scores):.5f}")
print(f"Std: {np.std(fold_scores):.5f}")
print(f"Min: {min(fold_scores):.5f}")
print(f"Max: {max(fold_scores):.5f}")
print(f"Range: {max(fold_scores) - min(fold_scores):.5f}")

# Compare with exp_003 (5-fold)
exp_003_std = 0.00685
print(f"\nexp_003 std (5-fold): {exp_003_std:.5f}")
print(f"exp_011 std (10-fold): {np.std(fold_scores):.5f}")
print(f"Ratio: {np.std(fold_scores) / exp_003_std:.2f}x higher")

In [None]:
# Is the CV improvement statistically significant?
# exp_003: 0.81951 +/- 0.00685 (5-fold)
# exp_011: 0.82032 +/- 0.01408 (10-fold)

exp_003_cv = 0.81951
exp_003_std = 0.00685
exp_003_n = 5

exp_011_cv = 0.82032
exp_011_std = 0.01408
exp_011_n = 10

# Standard error of the difference
se_diff = np.sqrt((exp_003_std**2 / exp_003_n) + (exp_011_std**2 / exp_011_n))
diff = exp_011_cv - exp_003_cv
t_stat = diff / se_diff

print(f"CV improvement: {diff:.5f}")
print(f"Standard error of difference: {se_diff:.5f}")
print(f"t-statistic: {t_stat:.3f}")
print(f"\nInterpretation:")
if abs(t_stat) < 1.0:
    print("NOT statistically significant (t < 1.0)")
elif abs(t_stat) < 2.0:
    print("Marginally significant (1.0 < t < 2.0)")
else:
    print("Statistically significant (t > 2.0)")

In [None]:
# Decision analysis
print("=" * 60)
print("DECISION ANALYSIS")
print("=" * 60)

print("\n1. CV IMPROVEMENT:")
print(f"   exp_011 CV: {exp_011_cv:.5f} (BEST EVER)")
print(f"   exp_003 CV: {exp_003_cv:.5f}")
print(f"   Improvement: +{diff:.5f} ({diff/exp_003_cv*100:.2f}%)")

print("\n2. PREDICTED LB:")
print(f"   Predicted: {predicted_lb:.5f}")
print(f"   Best LB: 0.80453")
print(f"   Expected change: {predicted_lb - 0.80453:+.5f}")

print("\n3. STATISTICAL SIGNIFICANCE:")
print(f"   t-statistic: {t_stat:.3f}")
print(f"   Verdict: {'NOT significant' if abs(t_stat) < 1.0 else 'Marginally significant'}")

print("\n4. SUBMISSIONS REMAINING: 6")

print("\n5. RECOMMENDATION:")
if predicted_lb > 0.80453:
    print("   SUBMIT - Predicted LB improvement")
else:
    print("   SUBMIT ANYWAY - Need LB feedback to calibrate regularization effect")
    print("   Regularization improved CV (contrary to overfitting hypothesis)")
    print("   Worth testing if it helps generalization")