# Loop 65 Analysis: CV-LB Relationship After Ens Model Breakthrough

Exp_069 achieved CV = 0.005146 (35% improvement). Need to analyze:
1. CV-LB relationship with all 13 submissions
2. Predicted LB for new CV
3. Whether this approach might have a different CV-LB relationship

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

# All 13 submissions with CV and LB scores
submissions = [
    {'exp': 'exp_000', 'cv': 0.0111, 'lb': 0.0982},
    {'exp': 'exp_001', 'cv': 0.0123, 'lb': 0.1065},
    {'exp': 'exp_003', 'cv': 0.0105, 'lb': 0.0972},
    {'exp': 'exp_005', 'cv': 0.0104, 'lb': 0.0969},
    {'exp': 'exp_006', 'cv': 0.0097, 'lb': 0.0946},
    {'exp': 'exp_007', 'cv': 0.0093, 'lb': 0.0932},
    {'exp': 'exp_009', 'cv': 0.0092, 'lb': 0.0936},
    {'exp': 'exp_012', 'cv': 0.0090, 'lb': 0.0913},
    {'exp': 'exp_024', 'cv': 0.0087, 'lb': 0.0893},
    {'exp': 'exp_026', 'cv': 0.0085, 'lb': 0.0887},
    {'exp': 'exp_030', 'cv': 0.0083, 'lb': 0.0877},
    {'exp': 'exp_041', 'cv': 0.0090, 'lb': 0.0932},
    {'exp': 'exp_042', 'cv': 0.0145, 'lb': 0.1147},
]

df = pd.DataFrame(submissions)
print(f'Total submissions: {len(df)}')
print(df)

In [None]:
# Fit linear regression: LB = slope * CV + intercept
slope, intercept, r_value, p_value, std_err = stats.linregress(df['cv'], df['lb'])

print(f'CV-LB Relationship:')
print(f'  LB = {slope:.3f} * CV + {intercept:.4f}')
print(f'  R² = {r_value**2:.4f}')
print(f'  Intercept = {intercept:.4f}')
print(f'\nTarget LB: 0.0347')
print(f'Best LB so far: {df["lb"].min():.4f} (exp_030)')
print(f'Gap to target: {df["lb"].min() - 0.0347:.4f}')

In [None]:
# Predict LB for new CV = 0.005146
new_cv = 0.005146
predicted_lb = slope * new_cv + intercept

print(f'\nPrediction for exp_069 (CV = {new_cv}):')
print(f'  Predicted LB = {slope:.3f} * {new_cv} + {intercept:.4f} = {predicted_lb:.4f}')
print(f'\nIf the CV-LB relationship holds:')
print(f'  Expected LB improvement: {df["lb"].min():.4f} -> {predicted_lb:.4f}')
print(f'  Expected improvement: {(df["lb"].min() - predicted_lb) / df["lb"].min() * 100:.1f}%')
print(f'\nTarget: 0.0347')
print(f'  Gap from predicted LB: {predicted_lb - 0.0347:.4f}')

In [None]:
# What CV would be needed to reach target?
required_cv = (0.0347 - intercept) / slope
print(f'\nRequired CV to reach target 0.0347:')
print(f'  CV = (0.0347 - {intercept:.4f}) / {slope:.3f} = {required_cv:.6f}')
print(f'\nCurrent best CV: 0.005146')
print(f'  Gap: {0.005146 - required_cv:.6f}')

if required_cv < 0:
    print(f'\n*** CRITICAL: Required CV is NEGATIVE! ***')
    print(f'  This means the intercept ({intercept:.4f}) > target (0.0347)')
    print(f'  Target is MATHEMATICALLY UNREACHABLE with current CV-LB relationship!')
    print(f'  We need to CHANGE the relationship, not just improve CV.')

In [None]:
# Key insight: The Ens Model approach is fundamentally different
print('='*60)
print('KEY STRATEGIC INSIGHT')
print('='*60)
print()
print('The Ens Model approach (exp_069) is fundamentally different:')
print('  - CatBoost + XGBoost (gradient boosting) vs GP + MLP + LGBM')
print('  - Feature priority-based correlation filtering (69 features vs 140+)')
print('  - Different ensemble weights for single vs full data')
print('  - Multi-target normalization')
print()
print('This approach MAY have a different CV-LB relationship!')
print()
print('CRITICAL QUESTION: Does this approach have a lower intercept?')
print()
print('If the intercept is lower, the target becomes reachable.')
print('If the intercept is the same, we need to try other approaches.')
print()
print('RECOMMENDATION: SUBMIT exp_069 to verify the CV-LB relationship!')

In [None]:
# Summary
print('\n' + '='*60)
print('SUMMARY')
print('='*60)
print()
print(f'Current CV-LB relationship: LB = {slope:.3f} * CV + {intercept:.4f} (R² = {r_value**2:.4f})')
print(f'Intercept ({intercept:.4f}) > Target (0.0347) means target is unreachable with current relationship')
print()
print(f'exp_069 achieved CV = 0.005146 (35% improvement over previous best)')
print(f'Predicted LB (using old relationship) = {predicted_lb:.4f}')
print()
print('CRITICAL: The Ens Model approach is fundamentally different.')
print('It MAY have a different CV-LB relationship with a lower intercept.')
print()
print('RECOMMENDATION: SUBMIT exp_069 to verify the CV-LB relationship!')
print('This is the highest-leverage action available.')