# Loop 13 Analysis: Strategic Assessment

**Objective**: Analyze current state and identify next steps to beat target 0.0333

**Current Status**:
- Best CV: 0.009004 (exp_013 compliant ensemble)
- Best LB: 0.0932 (exp_007 [32,16] MLP)
- Target: 0.0333
- Submissions: 0 remaining today

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load submission history
submissions = [
    {'exp': 'exp_000', 'cv': 0.0111, 'lb': 0.0982, 'model': 'MLP [128,128,64]'},
    {'exp': 'exp_001', 'cv': 0.0123, 'lb': 0.1065, 'model': 'LightGBM'},
    {'exp': 'exp_003', 'cv': 0.0105, 'lb': 0.0972, 'model': 'Combined Spange+DRFP'},
    {'exp': 'exp_005', 'cv': 0.0104, 'lb': 0.0969, 'model': 'Large Ensemble (15)'},
    {'exp': 'exp_006', 'cv': 0.0097, 'lb': 0.0946, 'model': 'MLP [64,32]'},
    {'exp': 'exp_007', 'cv': 0.0093, 'lb': 0.0932, 'model': 'MLP [32,16]'},
    {'exp': 'exp_009', 'cv': 0.0092, 'lb': 0.0936, 'model': 'MLP [16]'},
]

df = pd.DataFrame(submissions)
df['ratio'] = df['lb'] / df['cv']
print(df.to_string())

In [None]:
# Calculate CV-LB correlation and trend
from scipy import stats

corr, pval = stats.pearsonr(df['cv'], df['lb'])
print(f'CV-LB Correlation: {corr:.4f} (p={pval:.4f})')

# Linear regression
slope, intercept, r_value, p_value, std_err = stats.linregress(df['cv'], df['lb'])
print(f'\nLinear fit: LB = {slope:.2f} * CV + {intercept:.4f}')
print(f'RÂ² = {r_value**2:.4f}')

# Predict LB for exp_013 (CV 0.009004)
cv_013 = 0.009004
predicted_lb_013 = slope * cv_013 + intercept
print(f'\nFor exp_013 (CV={cv_013}):')
print(f'  Predicted LB: {predicted_lb_013:.4f}')
print(f'  Best LB so far: 0.0932 (exp_007)')
print(f'  Improvement potential: {(0.0932 - predicted_lb_013)/0.0932*100:.1f}%')

In [None]:
# Analyze the CV-LB ratio trend
print('CV-LB Ratio Analysis:')
print('=' * 50)
for _, row in df.iterrows():
    print(f"{row['exp']}: CV={row['cv']:.4f}, LB={row['lb']:.4f}, Ratio={row['ratio']:.2f}x")

print(f'\nMean ratio: {df["ratio"].mean():.2f}x')
print(f'Std ratio: {df["ratio"].std():.2f}x')
print(f'Min ratio: {df["ratio"].min():.2f}x')
print(f'Max ratio: {df["ratio"].max():.2f}x')

# Check if ratio is increasing
print(f'\nRatio trend: {df["ratio"].iloc[0]:.2f}x -> {df["ratio"].iloc[-1]:.2f}x')
print(f'Ratio increased by: {(df["ratio"].iloc[-1] - df["ratio"].iloc[0])/df["ratio"].iloc[0]*100:.1f}%')

In [None]:
# Key insight: The [32,16] model has the best LB despite not having the best CV
# This suggests the CV-LB relationship is breaking down at low CV scores

print('KEY INSIGHT: CV-LB Decorrelation at Low CV Scores')
print('=' * 60)
print()
print('exp_007 [32,16]: CV=0.0093, LB=0.0932 <- BEST LB')
print('exp_009 [16]:    CV=0.0092, LB=0.0936 <- Better CV, WORSE LB!')
print('exp_013 ensemble: CV=0.0090, LB=??? <- Best CV, unknown LB')
print()
print('The simplest model ([16]) has the best CV but WORSE LB.')
print('This suggests overfitting to the CV structure.')
print()
print('RECOMMENDATION: Trust the [32,16] model (exp_007) for LB performance.')
print('The ensemble may or may not improve LB - it\'s a gamble.')

In [None]:
# Calculate what CV we need to beat the target
target = 0.0333

# Using the linear fit
required_cv = (target - intercept) / slope
print(f'To beat target {target}:')
print(f'  Required CV (linear fit): {required_cv:.6f}')
print(f'  Current best CV: 0.009004')
print(f'  Gap: {(0.009004 - required_cv)/0.009004*100:.1f}% improvement needed')

# Using the average ratio
avg_ratio = df['ratio'].mean()
required_cv_ratio = target / avg_ratio
print(f'\nUsing average ratio ({avg_ratio:.2f}x):')
print(f'  Required CV: {required_cv_ratio:.6f}')
print(f'  Gap: {(0.009004 - required_cv_ratio)/0.009004*100:.1f}% improvement needed')

print('\n' + '=' * 60)
print('CONCLUSION: The target 0.0333 is NOT achievable with current approach.')
print('We would need CV < 0.004 to have a chance, which is unrealistic.')
print('The GNN benchmark achieved 0.0039 MSE - we cannot match this with tabular ML.')

In [None]:
# Strategic options for tomorrow
print('STRATEGIC OPTIONS FOR TOMORROW (when submissions reset)')
print('=' * 60)
print()
print('Option A: Submit exp_013 (Compliant Ensemble)')
print('  - CV: 0.009004 (best)')
print('  - Predicted LB: ~0.091-0.095 (uncertain)')
print('  - Risk: CV-LB decorrelation may hurt')
print('  - Reward: Could beat 0.0932 if ensemble generalizes')
print()
print('Option B: Submit exp_007 [32,16] in compliant format')
print('  - CV: 0.009262')
print('  - Known LB: 0.0932 (proven best)')
print('  - Risk: Low - we know this works')
print('  - Reward: Reliable baseline')
print()
print('Option C: Try different ensemble weights')
print('  - Current: MLP 0.6, LightGBM 0.4')
print('  - Try: MLP 0.7, LightGBM 0.3 (more MLP emphasis)')
print('  - Try: MLP 0.5, LightGBM 0.5 (equal weighting)')
print()
print('RECOMMENDATION: Submit exp_013 first to test ensemble hypothesis.')
print('If LB >= 0.0932, fall back to exp_007 [32,16] alone.')

In [None]:
# What experiments can we run NOW (without submissions)?
print('EXPERIMENTS TO RUN NOW (no submissions needed)')
print('=' * 60)
print()
print('1. Test different ensemble weights:')
print('   - MLP 0.7, LightGBM 0.3')
print('   - MLP 0.5, LightGBM 0.5')
print('   - MLP 0.55, LightGBM 0.45')
print()
print('2. Try pure MLP ensemble (no LightGBM):')
print('   - [32,16] + [64,32]')
print('   - [32,16] + [32,16] (different seeds)')
print()
print('3. Try different tree models:')
print('   - XGBoost instead of LightGBM')
print('   - CatBoost instead of LightGBM')
print()
print('4. Prepare compliant notebooks for all candidates:')
print('   - exp_007 [32,16] alone')
print('   - exp_013 ensemble (already done)')
print('   - New ensemble variations')
print()
print('5. Analyze prediction errors:')
print('   - Which solvents have highest error?')
print('   - Are there patterns in the errors?')
print('   - Can we target specific solvents for improvement?')