# Loop 60 LB Feedback Analysis

**Latest submission:** exp_032 with CV=0.0082, LB=0.0873

**Target:** 0.0707

**Remaining submissions:** 4

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

# Submission history
submissions = [
    ('exp_000', 0.011081, 0.09816),
    ('exp_001', 0.012297, 0.10649),
    ('exp_003', 0.010501, 0.09719),
    ('exp_005', 0.010430, 0.09691),
    ('exp_006', 0.009749, 0.09457),
    ('exp_007', 0.009262, 0.09316),
    ('exp_009', 0.009192, 0.09364),
    ('exp_012', 0.009004, 0.09134),
    ('exp_024', 0.008689, 0.08929),
    ('exp_026', 0.008465, 0.08875),
    ('exp_030', 0.008298, 0.08772),
    ('exp_035', 0.009825, 0.09696),
    ('exp_032', 0.008194, 0.08731),
]

df = pd.DataFrame(submissions, columns=['exp', 'cv', 'lb'])
df['gap'] = df['lb'] - df['cv']
print(df.to_string(index=False))

In [None]:
# Linear regression analysis
slope, intercept, r_value, p_value, std_err = stats.linregress(df['cv'], df['lb'])
print(f'\nCV-LB Relationship:')
print(f'  LB = {slope:.4f} * CV + {intercept:.4f}')
print(f'  RÂ² = {r_value**2:.4f}')
print(f'  Std Error = {std_err:.4f}')

# What CV is needed to hit target?
target_lb = 0.0707
required_cv = (target_lb - intercept) / slope
print(f'\nTo hit target LB={target_lb}:')
print(f'  Required CV = {required_cv:.6f}')
print(f'  Current best CV = {df["cv"].min():.6f}')
print(f'  Gap to close = {df["cv"].min() - required_cv:.6f}')

In [None]:
# Analyze the gap pattern
print('\nGap Analysis:')
print(f'  Mean gap: {df["gap"].mean():.5f}')
print(f'  Std gap: {df["gap"].std():.5f}')
print(f'  Min gap: {df["gap"].min():.5f} ({df.loc[df["gap"].idxmin(), "exp"]})')
print(f'  Max gap: {df["gap"].max():.5f} ({df.loc[df["gap"].idxmax(), "exp"]})')

# Residuals from linear fit
df['predicted_lb'] = slope * df['cv'] + intercept
df['residual'] = df['lb'] - df['predicted_lb']
print('\nResiduals from linear fit (negative = better than expected):')
for _, row in df.sort_values('residual').iterrows():
    print(f'  {row["exp"]}: {row["residual"]:+.5f}')

In [None]:
# Key insight: The best residual tells us which model generalizes best
best_residual_exp = df.loc[df['residual'].idxmin(), 'exp']
best_residual = df['residual'].min()
print(f'\nBest generalizing model: {best_residual_exp} (residual: {best_residual:+.5f})')

# What if we could match that residual with best CV?
best_cv = df['cv'].min()
potential_lb = slope * best_cv + intercept + best_residual
print(f'\nIf best CV ({best_cv:.6f}) had best residual ({best_residual:+.5f}):')
print(f'  Potential LB = {potential_lb:.5f}')
print(f'  Target = {target_lb}')
print(f'  Still need: {potential_lb - target_lb:.5f}')

In [None]:
# CRITICAL: With 4 submissions left, what's the strategy?
print('\n' + '='*60)
print('STRATEGIC ANALYSIS - 4 SUBMISSIONS REMAINING')
print('='*60)

print('\n1. CURRENT BEST:')
print(f'   exp_032: CV={0.008194:.6f}, LB={0.08731:.5f}')

print('\n2. CV-LB RELATIONSHIP:')
print(f'   LB = {slope:.4f} * CV + {intercept:.4f}')
print(f'   Intercept ({intercept:.4f}) > Target ({target_lb}) means:')
print(f'   Even with CV=0, predicted LB would be {intercept:.4f}')
print(f'   This is ABOVE target!')

print('\n3. WHAT THIS MEANS:')
print('   - The systematic gap (intercept) is too large')
print('   - Improving CV alone cannot reach target')
print('   - Need to find approach that REDUCES the intercept')

print('\n4. APPROACHES THAT MIGHT REDUCE INTERCEPT:')
print('   a) Different model architecture (GNN, attention)')
print('   b) Different features (domain-specific)')
print('   c) Different training strategy (domain adaptation)')
print('   d) Ensemble of diverse models')

print('\n5. REMAINING SUBMISSIONS STRATEGY:')
print('   - Submit 1: Try a fundamentally different approach')
print('   - Submit 2: Based on feedback from Submit 1')
print('   - Submit 3: Based on feedback from Submit 2')
print('   - Submit 4: Final best attempt')

In [None]:
# Check if any experiment has a notably different residual pattern
print('\nLooking for outliers in residual pattern...')
print('\nExperiments with negative residuals (better than expected):')
for _, row in df[df['residual'] < 0].sort_values('residual').iterrows():
    print(f'  {row["exp"]}: CV={row["cv"]:.6f}, LB={row["lb"]:.5f}, residual={row["residual"]:+.5f}')

print('\nExperiments with positive residuals (worse than expected):')
for _, row in df[df['residual'] > 0].sort_values('residual', ascending=False).iterrows():
    print(f'  {row["exp"]}: CV={row["cv"]:.6f}, LB={row["lb"]:.5f}, residual={row["residual"]:+.5f}')