# Loop 45 Strategic Analysis

## Key Questions:
1. What is the actual CV-LB relationship?
2. What would it take to reach target 0.073?
3. What unexplored approaches remain?
4. Should we submit exp_044 or try something else?

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Submission history
submissions = [
    ('exp_000', 0.011081, 0.09816),
    ('exp_001', 0.012297, 0.10649),
    ('exp_003', 0.010501, 0.09719),
    ('exp_005', 0.01043, 0.09691),
    ('exp_006', 0.009749, 0.09457),
    ('exp_007', 0.009262, 0.09316),
    ('exp_009', 0.009192, 0.09364),
    ('exp_012', 0.009004, 0.09134),
    ('exp_024', 0.008689, 0.08929),
    ('exp_026', 0.008465, 0.08875),
    ('exp_030', 0.008298, 0.08772),
    ('exp_035', 0.009825, 0.09696),
]

df = pd.DataFrame(submissions, columns=['exp', 'cv', 'lb'])
print('=== Submission History ===')
print(df.to_string(index=False))
print()

# Fit linear regression
X = df['cv'].values.reshape(-1, 1)
y = df['lb'].values
reg = LinearRegression().fit(X, y)
print(f'CV-LB Relationship: LB = {reg.coef_[0]:.4f} * CV + {reg.intercept_:.4f}')
print(f'RÂ² = {reg.score(X, y):.4f}')
print()

# What CV would we need to hit target?
target = 0.073
required_cv = (target - reg.intercept_) / reg.coef_[0]
print(f'Target LB: {target}')
print(f'Required CV to hit target: {required_cv:.6f}')
print(f'Current best CV: {df["cv"].min():.6f}')
print(f'Gap: {df["cv"].min() - required_cv:.6f}')
print()

# Best LB so far
best_lb = df['lb'].min()
best_cv = df.loc[df['lb'].idxmin(), 'cv']
print(f'Best LB: {best_lb:.5f} (from CV {best_cv:.6f})')
print(f'Gap to target: {best_lb - target:.5f} ({(best_lb - target)/target*100:.1f}%)')

In [None]:
# Analyze the anomaly: exp_035 had worse CV but similar LB to exp_030
print('=== Anomaly Analysis ===')
print()
print('exp_030: CV=0.008298, LB=0.08772 (BEST LB)')
print('exp_035: CV=0.009825, LB=0.09696')
print()
print('exp_035 had 18.4% worse CV but only 10.5% worse LB')
print('This suggests the CV-LB relationship may not be perfectly linear')
print()

# Calculate residuals
df['predicted_lb'] = reg.predict(df['cv'].values.reshape(-1, 1))
df['residual'] = df['lb'] - df['predicted_lb']
print('Residuals from linear fit:')
print(df[['exp', 'cv', 'lb', 'predicted_lb', 'residual']].to_string(index=False))
print()
print(f'Mean residual: {df["residual"].mean():.6f}')
print(f'Std residual: {df["residual"].std():.6f}')

In [None]:
# What if we could reduce the intercept?
print('=== Intercept Analysis ===')
print()
print(f'Current intercept: {reg.intercept_:.4f}')
print(f'Target: {target}')
print()
print('If intercept were 0.04 (instead of 0.0528):')
new_intercept = 0.04
required_cv_new = (target - new_intercept) / reg.coef_[0]
print(f'  Required CV: {required_cv_new:.6f}')
print(f'  This is achievable with current best CV {df["cv"].min():.6f}')
print()
print('Key insight: The intercept is the bottleneck, not the CV')
print('We need to find an approach that reduces the intercept')

In [None]:
# What approaches haven't been tried?
print('=== Unexplored Approaches ===')
print()
print('TRIED AND FAILED:')
print('- GNN (exp_040): CV 0.068767 - too high')
print('- ChemBERTa (exp_041): CV 0.010288 - no improvement')
print('- Calibration (exp_042): CV 0.010008 - no improvement')
print('- Non-linear mixture (exp_043): CV 0.073776 - mixture only')
print('- Hybrid model (exp_044): CV 0.008597 - slight degradation')
print('- Learned embeddings (exp_039): CV 0.080438 - OOD failure')
print()
print('POTENTIALLY UNEXPLORED:')
print('1. Importance-weighted CV (address distribution shift)')
print('2. Adversarial validation (identify drifting features)')
print('3. Mean reversion (blend predictions toward training mean)')
print('4. Separate models for single vs mixture (not just features)')
print('5. Target-specific models (SM, Product 2, Product 3)')
print('6. Ensemble of diverse model families (GP + MLP + LGBM + Ridge)')

In [None]:
# Analyze what made exp_030 the best LB
print('=== exp_030 Analysis ===')
print()
print('exp_030 achieved best LB (0.08772) with CV 0.008298')
print('This was a GP+MLP+LGBM ensemble with weights (0.15, 0.55, 0.3)')
print()
print('Key features of exp_030:')
print('- GP weight: 0.15 (Gaussian Process for uncertainty)')
print('- MLP weight: 0.55 (Neural network for non-linear patterns)')
print('- LGBM weight: 0.30 (Gradient boosting for tabular data)')
print('- Combined Spange + DRFP features')
print('- Arrhenius kinetics features')
print()
print('The ensemble diversity may be key to the good LB performance')

In [None]:
# Strategic recommendation
print('=== STRATEGIC RECOMMENDATION ===')
print()
print('CURRENT SITUATION:')
print(f'- Best LB: 0.08772 (exp_030)')
print(f'- Target: 0.073')
print(f'- Gap: 20.2%')
print(f'- Remaining submissions: 4')
print()
print('THE INTERCEPT PROBLEM:')
print(f'- CV-LB relationship: LB = 4.29*CV + 0.0528')
print(f'- Intercept (0.0528) is 72% of target (0.073)')
print(f'- Even CV=0 would give LB=0.0528')
print()
print('RECOMMENDED APPROACH:')
print('1. DO NOT submit exp_044 (CV 0.008597 is worse than exp_030)')
print('2. Focus on approaches that could change the CV-LB relationship:')
print('   a. Mean reversion: blend predictions toward training mean')
print('   b. Separate models: train completely different models for single vs mixture')
print('   c. Target-specific tuning: optimize for each target separately')
print('3. The key is to reduce the INTERCEPT, not just improve CV')
print()
print('SUBMISSION STRATEGY:')
print('- Submission 1: Mean reversion on exp_030 (alpha=0.8-0.9)')
print('- Submission 2: Based on results, refine or try separate models')
print('- Save 2 submissions for final refinements')