# Loop 65 Analysis: Major Breakthrough with Ens Model Approach

**Key Question**: Does the CatBoost + XGBoost ensemble have a DIFFERENT CV-LB relationship?

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

# All submissions with CV and LB scores
submissions = [
    {'exp': 'exp_000', 'cv': 0.0111, 'lb': 0.0982, 'model': 'MLP'},
    {'exp': 'exp_001', 'cv': 0.0123, 'lb': 0.1065, 'model': 'LGBM'},
    {'exp': 'exp_003', 'cv': 0.0105, 'lb': 0.0972, 'model': 'MLP'},
    {'exp': 'exp_005', 'cv': 0.0104, 'lb': 0.0969, 'model': 'MLP'},
    {'exp': 'exp_006', 'cv': 0.0097, 'lb': 0.0946, 'model': 'MLP'},
    {'exp': 'exp_007', 'cv': 0.0093, 'lb': 0.0932, 'model': 'MLP'},
    {'exp': 'exp_009', 'cv': 0.0092, 'lb': 0.0936, 'model': 'Ridge'},
    {'exp': 'exp_012', 'cv': 0.0090, 'lb': 0.0913, 'model': 'Ensemble'},
    {'exp': 'exp_024', 'cv': 0.0087, 'lb': 0.0893, 'model': 'ACS+MLP'},
    {'exp': 'exp_026', 'cv': 0.0085, 'lb': 0.0887, 'model': 'Weighted'},
    {'exp': 'exp_030', 'cv': 0.0083, 'lb': 0.0877, 'model': 'GP+Ensemble'},
    {'exp': 'exp_041', 'cv': 0.0090, 'lb': 0.0932, 'model': 'XGB'},
    {'exp': 'exp_042', 'cv': 0.0145, 'lb': 0.1147, 'model': 'GroupKFold'},
]

df = pd.DataFrame(submissions)
print(f'Total submissions: {len(df)}')
print(df)

In [None]:
# Fit linear regression to existing submissions
cv_vals = df['cv'].values
lb_vals = df['lb'].values

slope, intercept, r_value, p_value, std_err = stats.linregress(cv_vals, lb_vals)

print(f'CV-LB Linear Fit:')
print(f'  LB = {slope:.3f} * CV + {intercept:.4f}')
print(f'  R-squared = {r_value**2:.4f}')
print(f'  Intercept = {intercept:.4f}')
print(f'\nTarget: 0.0347')
print(f'Intercept ({intercept:.4f}) vs Target (0.0347): Gap = {intercept - 0.0347:.4f}')
print(f'\nIf intercept > target, target is unreachable by improving CV alone!')
print(f'Intercept > Target? {intercept > 0.0347}')

In [None]:
# Predict LB for new CV = 0.005146 using existing relationship
new_cv = 0.005146
predicted_lb = slope * new_cv + intercept

print(f'\n=== PREDICTION FOR EXP_069 ===')
print(f'New CV: {new_cv:.6f}')
print(f'Predicted LB (using existing relationship): {predicted_lb:.4f}')
print(f'\nBest LB so far: 0.0877 (exp_030)')
print(f'Predicted improvement: {(0.0877 - predicted_lb) / 0.0877 * 100:.1f}%')
print(f'\nTarget: 0.0347')
print(f'Gap to target: {predicted_lb - 0.0347:.4f}')

In [None]:
# Key question: Is this approach fundamentally different?
print('='*60)
print('KEY STRATEGIC QUESTION')
print('='*60)
print()
print('The CatBoost + XGBoost ensemble (exp_069) achieved CV = 0.005146')
print('This is 35% better than our previous best CV (0.007938)')
print()
print('Two scenarios:')
print()
print('SCENARIO 1: Same CV-LB relationship')
print(f'  Predicted LB = {predicted_lb:.4f}')
print(f'  Still far from target (0.0347)')
print(f'  But would be best LB achieved (vs 0.0877)')
print()
print('SCENARIO 2: Different CV-LB relationship')
print('  The intercept might be lower')
print('  This would make the target potentially reachable')
print()
print('RECOMMENDATION: SUBMIT exp_069 to verify!')
print('  - If LB improves proportionally, we have a new best')
print('  - If LB improves MORE than proportionally, the relationship changed!')
print('  - Either way, this is valuable information')

In [None]:
# Summary of the breakthrough
print('='*60)
print('SUMMARY: MAJOR BREAKTHROUGH')
print('='*60)
print()
print('Experiment 069 (Ens Model Exact) Results:')
print('  - Single Solvent MSE: 0.009175 (worse than best 0.008216)')
print('  - Full Data MSE: 0.002992 (MUCH better than best 0.007789)')
print('  - Combined MSE: 0.005146 (35% improvement!)')
print()
print('Key Differences from Previous Approaches:')
print('  1. CatBoost with MultiRMSE (multi-output regression)')
print('  2. XGBoost with separate models per target')
print('  3. Different ensemble weights: Single (7:6), Full (1:2)')
print('  4. Feature priority-based correlation filtering (4199 -> 69 features)')
print('  5. Multi-target normalization (clip + renormalize)')
print()
print('Why This Might Change the CV-LB Relationship:')
print('  - Fundamentally different model family (gradient boosting vs NN/GP)')
print('  - Much smaller feature set (69 vs 140+)')
print('  - Different ensemble strategy for single vs full data')
print('  - The kernel authors likely tuned for LB, not just CV')
print()
print('NEXT STEP: SUBMIT exp_069 to verify the CV-LB relationship!')