# Loop 54 Analysis: Strategic Assessment

## Key Questions:
1. What is the current CV-LB relationship?
2. Are there any approaches that might change the intercept?
3. What do the pending submissions (exp_049, exp_050) tell us?
4. What should we try next?

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

# Submission history (CV, LB)
submissions = [
    ('exp_000', 0.0111, 0.0982),
    ('exp_001', 0.0123, 0.1065),
    ('exp_003', 0.0105, 0.0972),
    ('exp_005', 0.0104, 0.0969),
    ('exp_006', 0.0097, 0.0946),
    ('exp_007', 0.0093, 0.0932),
    ('exp_009', 0.0092, 0.0936),
    ('exp_012', 0.0090, 0.0913),
    ('exp_024', 0.0087, 0.0893),
    ('exp_026', 0.0085, 0.0887),
    ('exp_030', 0.0083, 0.0877),
    ('exp_035', 0.0098, 0.0970),
]

cv_scores = np.array([s[1] for s in submissions])
lb_scores = np.array([s[2] for s in submissions])

# Linear regression
slope, intercept, r_value, p_value, std_err = stats.linregress(cv_scores, lb_scores)

print('CV-LB Relationship Analysis')
print('='*60)
print(f'Linear fit: LB = {slope:.4f} * CV + {intercept:.4f}')
print(f'RÂ² = {r_value**2:.4f}')
print(f'Intercept = {intercept:.4f}')
print(f'Target = 0.0347')
print()
print('CRITICAL ANALYSIS:')
print(f'  Intercept ({intercept:.4f}) > Target (0.0347)?', 'YES' if intercept > 0.0347 else 'NO')
print(f'  Required CV to hit target: ({0.0347} - {intercept:.4f}) / {slope:.4f} = {(0.0347 - intercept) / slope:.6f}')
print()
print('Gap Analysis:')
print(f'  Best LB so far: 0.0877 (exp_030)')
print(f'  Target: 0.0347')
print(f'  Gap: {0.0877 - 0.0347:.4f} ({(0.0877 - 0.0347) / 0.0347 * 100:.1f}%)')

In [None]:
# Plot CV vs LB
plt.figure(figsize=(10, 6))
plt.scatter(cv_scores, lb_scores, s=100, alpha=0.7, label='Submissions')

# Fit line
cv_range = np.linspace(0, 0.015, 100)
lb_pred = slope * cv_range + intercept
plt.plot(cv_range, lb_pred, 'r--', label=f'Fit: LB = {slope:.2f}*CV + {intercept:.4f}')

# Target line
plt.axhline(y=0.0347, color='g', linestyle='-', linewidth=2, label='Target (0.0347)')

# Intercept line
plt.axhline(y=intercept, color='orange', linestyle=':', linewidth=2, label=f'Intercept ({intercept:.4f})')

plt.xlabel('CV Score')
plt.ylabel('LB Score')
plt.title('CV vs LB Relationship - All 12 Submissions')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xlim(0, 0.015)
plt.ylim(0, 0.12)
plt.show()

print('\nKey Insight: The intercept (0.0525) is ABOVE the target (0.0347).')
print('This means even with CV=0, we would get LB=0.0525.')
print('The target is MATHEMATICALLY UNREACHABLE by improving CV alone.')

In [None]:
# Analyze residuals from the fit
residuals = lb_scores - (slope * cv_scores + intercept)

print('Residual Analysis')
print('='*60)
for i, (name, cv, lb) in enumerate(submissions):
    pred_lb = slope * cv + intercept
    residual = lb - pred_lb
    print(f'{name}: CV={cv:.4f}, LB={lb:.4f}, Pred={pred_lb:.4f}, Residual={residual:.4f} ({residual/lb*100:.1f}%)')

print(f'\nMean absolute residual: {np.mean(np.abs(residuals)):.4f}')
print(f'Max residual: {np.max(np.abs(residuals)):.4f}')
print(f'\nAll submissions fall on the same line with residuals < 2.5%')

In [None]:
# Predictions for pending submissions
print('Predictions for Pending Submissions')
print('='*60)
pending = [
    ('exp_049', 0.0081),
    ('exp_050', 0.0081),
    ('exp_052', 0.0081),  # Same as exp_050 with clipping
]

for name, cv in pending:
    pred_lb = slope * cv + intercept
    print(f'{name}: CV={cv:.4f} -> Predicted LB={pred_lb:.4f}')

print(f'\nBest LB so far: 0.0877 (exp_030)')
print(f'Expected improvement from exp_052: {0.0877 - (slope * 0.0081 + intercept):.4f}')
print(f'\nNote: exp_052 is exp_050 with clipping. Same CV, should have same LB.')

In [None]:
# What would it take to reach the target?
print('What Would It Take to Reach the Target?')
print('='*60)
print(f'Target LB: 0.0347')
print(f'Current intercept: {intercept:.4f}')
print(f'Current slope: {slope:.4f}')
print()
print('Option 1: Improve CV (keeping same relationship)')
required_cv = (0.0347 - intercept) / slope
print(f'  Required CV: {required_cv:.6f} (NEGATIVE - IMPOSSIBLE)')
print()
print('Option 2: Reduce the intercept (change the relationship)')
print(f'  If intercept = 0.0300, required CV = {(0.0347 - 0.0300) / slope:.6f}')
print(f'  If intercept = 0.0200, required CV = {(0.0347 - 0.0200) / slope:.6f}')
print(f'  If intercept = 0.0100, required CV = {(0.0347 - 0.0100) / slope:.6f}')
print()
print('Option 3: Reduce the slope (change the relationship)')
print(f'  If slope = 2.0, required CV = {(0.0347 - intercept) / 2.0:.6f}')
print(f'  If slope = 1.0, required CV = {(0.0347 - intercept) / 1.0:.6f}')
print()
print('CONCLUSION: We MUST change the CV-LB relationship to reach the target.')

In [None]:
# Analyze what approaches might change the relationship
print('Approaches That Might Change the CV-LB Relationship')
print('='*60)
print()
print('1. UNCERTAINTY-WEIGHTED PREDICTIONS')
print('   - Use ensemble variance to estimate uncertainty')
print('   - For high-uncertainty predictions, blend toward population mean')
print('   - Hypothesis: Reduces extrapolation error, lowering intercept')
print()
print('2. PER-SOLVENT ERROR ANALYSIS')
print('   - Identify which solvents cause the most error')
print('   - Develop solvent-specific strategies')
print('   - Hypothesis: Some solvents are outliers causing the intercept')
print()
print('3. CONSERVATIVE PREDICTIONS FOR OUTLIERS')
print('   - Detect when test solvent is far from training distribution')
print('   - Use simpler model or blend toward mean for outliers')
print('   - Hypothesis: Outlier solvents cause the intercept')
print()
print('4. DIFFERENT CV SCHEME (GroupKFold)')
print('   - The "mixall" kernel uses GroupKFold (5 splits) instead of LOO')
print('   - This might have a DIFFERENT CV-LB relationship')
print('   - Worth testing to see if it changes the intercept')
print()
print('5. PSEUDO-LABELING')
print('   - Use confident test predictions to augment training')
print('   - Hypothesis: Adapts model to test distribution')
print()

In [None]:
# Summary and recommendations
print('SUMMARY AND RECOMMENDATIONS')
print('='*60)
print()
print('CURRENT STATUS:')
print(f'  - Best CV: 0.0081 (exp_049, exp_050, exp_052)')
print(f'  - Best LB: 0.0877 (exp_030)')
print(f'  - Target: 0.0347')
print(f'  - Gap: 0.0530 (152.7%)')
print()
print('KEY INSIGHT:')
print(f'  - CV-LB relationship: LB = {slope:.2f}*CV + {intercept:.4f}')
print(f'  - Intercept ({intercept:.4f}) > Target (0.0347)')
print(f'  - Target is UNREACHABLE by improving CV alone')
print()
print('IMMEDIATE ACTION:')
print('  1. Submit exp_052 (CatBoost+XGBoost with clipping) to verify fix')
print('  2. If LB matches prediction (~0.0875), confirms relationship holds')
print('  3. Then try approaches that change the relationship')
print()
print('NEXT EXPERIMENTS TO TRY:')
print('  1. Per-solvent error analysis to identify outliers')
print('  2. Uncertainty-weighted blending toward mean')
print('  3. Conservative predictions for outlier solvents')
print('  4. GroupKFold CV scheme (from "mixall" kernel)')