# Loop 60 LB Feedback Analysis

**Latest submission:** exp_032 with CV=0.0082, LB=0.0873

**Target:** 0.0707

**Remaining submissions:** 4

In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

# Submission history
submissions = [
    ('exp_000', 0.011081, 0.09816),
    ('exp_001', 0.012297, 0.10649),
    ('exp_003', 0.010501, 0.09719),
    ('exp_005', 0.010430, 0.09691),
    ('exp_006', 0.009749, 0.09457),
    ('exp_007', 0.009262, 0.09316),
    ('exp_009', 0.009192, 0.09364),
    ('exp_012', 0.009004, 0.09134),
    ('exp_024', 0.008689, 0.08929),
    ('exp_026', 0.008465, 0.08875),
    ('exp_030', 0.008298, 0.08772),
    ('exp_035', 0.009825, 0.09696),
    ('exp_032', 0.008194, 0.08731),
]

df = pd.DataFrame(submissions, columns=['exp', 'cv', 'lb'])
df['gap'] = df['lb'] - df['cv']
print(df.to_string(index=False))

    exp       cv      lb      gap
exp_000 0.011081 0.09816 0.087079
exp_001 0.012297 0.10649 0.094193
exp_003 0.010501 0.09719 0.086689
exp_005 0.010430 0.09691 0.086480
exp_006 0.009749 0.09457 0.084821
exp_007 0.009262 0.09316 0.083898
exp_009 0.009192 0.09364 0.084448
exp_012 0.009004 0.09134 0.082336
exp_024 0.008689 0.08929 0.080601
exp_026 0.008465 0.08875 0.080285
exp_030 0.008298 0.08772 0.079422
exp_035 0.009825 0.09696 0.087135
exp_032 0.008194 0.08731 0.079116


In [2]:
# Linear regression analysis
slope, intercept, r_value, p_value, std_err = stats.linregress(df['cv'], df['lb'])
print(f'\nCV-LB Relationship:')
print(f'  LB = {slope:.4f} * CV + {intercept:.4f}')
print(f'  R² = {r_value**2:.4f}')
print(f'  Std Error = {std_err:.4f}')

# What CV is needed to hit target?
target_lb = 0.0707
required_cv = (target_lb - intercept) / slope
print(f'\nTo hit target LB={target_lb}:')
print(f'  Required CV = {required_cv:.6f}')
print(f'  Current best CV = {df["cv"].min():.6f}')
print(f'  Gap to close = {df["cv"].min() - required_cv:.6f}')


CV-LB Relationship:
  LB = 4.3370 * CV + 0.0523
  R² = 0.9581
  Std Error = 0.2735

To hit target LB=0.0707:
  Required CV = 0.004251
  Current best CV = 0.008194
  Gap to close = 0.003943


In [3]:
# Analyze the gap pattern
print('\nGap Analysis:')
print(f'  Mean gap: {df["gap"].mean():.5f}')
print(f'  Std gap: {df["gap"].std():.5f}')
print(f'  Min gap: {df["gap"].min():.5f} ({df.loc[df["gap"].idxmin(), "exp"]})')
print(f'  Max gap: {df["gap"].max():.5f} ({df.loc[df["gap"].idxmax(), "exp"]})')

# Residuals from linear fit
df['predicted_lb'] = slope * df['cv'] + intercept
df['residual'] = df['lb'] - df['predicted_lb']
print('\nResiduals from linear fit (negative = better than expected):')
for _, row in df.sort_values('residual').iterrows():
    print(f'  {row["exp"]}: {row["residual"]:+.5f}')


Gap Analysis:
  Mean gap: 0.08435
  Std gap: 0.00417
  Min gap: 0.07912 (exp_032)
  Max gap: 0.09419 (exp_001)

Residuals from linear fit (negative = better than expected):
  exp_000: -0.00216
  exp_024: -0.00066
  exp_003: -0.00062
  exp_005: -0.00059
  exp_030: -0.00053
  exp_032: -0.00049
  exp_026: -0.00023
  exp_006: +0.00003
  exp_012: +0.00003
  exp_007: +0.00073
  exp_001: +0.00089
  exp_009: +0.00151
  exp_035: +0.00209


In [4]:
# Key insight: The best residual tells us which model generalizes best
best_residual_exp = df.loc[df['residual'].idxmin(), 'exp']
best_residual = df['residual'].min()
print(f'\nBest generalizing model: {best_residual_exp} (residual: {best_residual:+.5f})')

# What if we could match that residual with best CV?
best_cv = df['cv'].min()
potential_lb = slope * best_cv + intercept + best_residual
print(f'\nIf best CV ({best_cv:.6f}) had best residual ({best_residual:+.5f}):')
print(f'  Potential LB = {potential_lb:.5f}')
print(f'  Target = {target_lb}')
print(f'  Still need: {potential_lb - target_lb:.5f}')


Best generalizing model: exp_000 (residual: -0.00216)

If best CV (0.008194) had best residual (-0.00216):
  Potential LB = 0.08564
  Target = 0.0707
  Still need: 0.01494


In [5]:
# CRITICAL: With 4 submissions left, what's the strategy?
print('\n' + '='*60)
print('STRATEGIC ANALYSIS - 4 SUBMISSIONS REMAINING')
print('='*60)

print('\n1. CURRENT BEST:')
print(f'   exp_032: CV={0.008194:.6f}, LB={0.08731:.5f}')

print('\n2. CV-LB RELATIONSHIP:')
print(f'   LB = {slope:.4f} * CV + {intercept:.4f}')
print(f'   Intercept ({intercept:.4f}) > Target ({target_lb}) means:')
print(f'   Even with CV=0, predicted LB would be {intercept:.4f}')
print(f'   This is ABOVE target!')

print('\n3. WHAT THIS MEANS:')
print('   - The systematic gap (intercept) is too large')
print('   - Improving CV alone cannot reach target')
print('   - Need to find approach that REDUCES the intercept')

print('\n4. APPROACHES THAT MIGHT REDUCE INTERCEPT:')
print('   a) Different model architecture (GNN, attention)')
print('   b) Different features (domain-specific)')
print('   c) Different training strategy (domain adaptation)')
print('   d) Ensemble of diverse models')

print('\n5. REMAINING SUBMISSIONS STRATEGY:')
print('   - Submit 1: Try a fundamentally different approach')
print('   - Submit 2: Based on feedback from Submit 1')
print('   - Submit 3: Based on feedback from Submit 2')
print('   - Submit 4: Final best attempt')


STRATEGIC ANALYSIS - 4 SUBMISSIONS REMAINING

1. CURRENT BEST:
   exp_032: CV=0.008194, LB=0.08731

2. CV-LB RELATIONSHIP:
   LB = 4.3370 * CV + 0.0523
   Intercept (0.0523) > Target (0.0707) means:
   Even with CV=0, predicted LB would be 0.0523
   This is ABOVE target!

3. WHAT THIS MEANS:
   - The systematic gap (intercept) is too large
   - Improving CV alone cannot reach target
   - Need to find approach that REDUCES the intercept

4. APPROACHES THAT MIGHT REDUCE INTERCEPT:
   a) Different model architecture (GNN, attention)
   b) Different features (domain-specific)
   c) Different training strategy (domain adaptation)
   d) Ensemble of diverse models

5. REMAINING SUBMISSIONS STRATEGY:
   - Submit 1: Try a fundamentally different approach
   - Submit 2: Based on feedback from Submit 1
   - Submit 3: Based on feedback from Submit 2
   - Submit 4: Final best attempt


In [6]:
# Check if any experiment has a notably different residual pattern
print('\nLooking for outliers in residual pattern...')
print('\nExperiments with negative residuals (better than expected):')
for _, row in df[df['residual'] < 0].sort_values('residual').iterrows():
    print(f'  {row["exp"]}: CV={row["cv"]:.6f}, LB={row["lb"]:.5f}, residual={row["residual"]:+.5f}')

print('\nExperiments with positive residuals (worse than expected):')
for _, row in df[df['residual'] > 0].sort_values('residual', ascending=False).iterrows():
    print(f'  {row["exp"]}: CV={row["cv"]:.6f}, LB={row["lb"]:.5f}, residual={row["residual"]:+.5f}')


Looking for outliers in residual pattern...

Experiments with negative residuals (better than expected):
  exp_000: CV=0.011081, LB=0.09816, residual=-0.00216
  exp_024: CV=0.008689, LB=0.08929, residual=-0.00066
  exp_003: CV=0.010501, LB=0.09719, residual=-0.00062
  exp_005: CV=0.010430, LB=0.09691, residual=-0.00059
  exp_030: CV=0.008298, LB=0.08772, residual=-0.00053
  exp_032: CV=0.008194, LB=0.08731, residual=-0.00049
  exp_026: CV=0.008465, LB=0.08875, residual=-0.00023

Experiments with positive residuals (worse than expected):
  exp_035: CV=0.009825, LB=0.09696, residual=+0.00209
  exp_009: CV=0.009192, LB=0.09364, residual=+0.00151
  exp_001: CV=0.012297, LB=0.10649, residual=+0.00089
  exp_007: CV=0.009262, LB=0.09316, residual=+0.00073
  exp_012: CV=0.009004, LB=0.09134, residual=+0.00003
  exp_006: CV=0.009749, LB=0.09457, residual=+0.00003


In [None]:
# CRITICAL INSIGHT: The intercept is 0.0523, which is BELOW target 0.0707
# This means target IS mathematically reachable!
# Required CV = (0.0707 - 0.0523) / 4.337 = 0.00425

print('='*60)
print('CRITICAL MATHEMATICAL ANALYSIS')
print('='*60)

print(f'\nIntercept: {intercept:.4f}')
print(f'Target: {target_lb}')
print(f'Intercept < Target: {intercept < target_lb}')

if intercept < target_lb:
    print('\n*** TARGET IS MATHEMATICALLY REACHABLE ***')
    print(f'Required CV to hit target: {required_cv:.6f}')
    print(f'Current best CV: {best_cv:.6f}')
    improvement_needed = (best_cv - required_cv) / best_cv * 100
    print(f'CV improvement needed: {improvement_needed:.1f}%')
else:
    print('\n*** TARGET UNREACHABLE WITH CURRENT APPROACH ***')

# What's the variance in residuals?
print(f'\nResidual analysis:')
print(f'  Mean residual: {df["residual"].mean():.6f}')
print(f'  Std residual: {df["residual"].std():.6f}')
print(f'  Best residual: {df["residual"].min():.6f}')
print(f'  Worst residual: {df["residual"].max():.6f}')

# If we could get best residual with best CV
best_possible_lb = slope * best_cv + intercept + df['residual'].min()
print(f'\nBest possible LB (best CV + best residual): {best_possible_lb:.5f}')
print(f'Target: {target_lb}')
print(f'Gap: {best_possible_lb - target_lb:.5f}')