# Loop 25 LB Feedback Analysis

**exp_024 submitted**: CV 0.0087 → LB 0.0893 (gap: -0.0806)

This is our BEST LB score yet! Let's analyze the trajectory and plan next steps.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

# All submissions with CV and LB scores
submissions = [
    {'exp': 'exp_000', 'cv': 0.0111, 'lb': 0.0982},
    {'exp': 'exp_001', 'cv': 0.0123, 'lb': 0.1065},
    {'exp': 'exp_003', 'cv': 0.0105, 'lb': 0.0972},
    {'exp': 'exp_005', 'cv': 0.0104, 'lb': 0.0969},
    {'exp': 'exp_006', 'cv': 0.0097, 'lb': 0.0946},
    {'exp': 'exp_007', 'cv': 0.0093, 'lb': 0.0932},
    {'exp': 'exp_009', 'cv': 0.0092, 'lb': 0.0936},
    {'exp': 'exp_012', 'cv': 0.0090, 'lb': 0.0913},
    {'exp': 'exp_024', 'cv': 0.0087, 'lb': 0.0893},  # NEW!
]

df = pd.DataFrame(submissions)
print('=== SUBMISSION HISTORY ===')
print(df.to_string(index=False))
print(f'\nBest CV: {df["cv"].min():.4f} ({df.loc[df["cv"].idxmin(), "exp"]})')
print(f'Best LB: {df["lb"].min():.4f} ({df.loc[df["lb"].idxmin(), "exp"]})')

=== SUBMISSION HISTORY ===
    exp     cv     lb
exp_000 0.0111 0.0982
exp_001 0.0123 0.1065
exp_003 0.0105 0.0972
exp_005 0.0104 0.0969
exp_006 0.0097 0.0946
exp_007 0.0093 0.0932
exp_009 0.0092 0.0936
exp_012 0.0090 0.0913
exp_024 0.0087 0.0893

Best CV: 0.0087 (exp_024)
Best LB: 0.0893 (exp_024)


In [2]:
# Updated CV-LB linear fit with new data point
cv_scores = df['cv'].values
lb_scores = df['lb'].values

slope, intercept, r_value, p_value, std_err = stats.linregress(cv_scores, lb_scores)

print('=== UPDATED CV-LB LINEAR FIT ===')
print(f'LB = {slope:.2f} * CV + {intercept:.4f}')
print(f'R² = {r_value**2:.4f}')
print(f'Slope std error: {std_err:.4f}')

# Prediction for various CV scores
for cv in [0.008, 0.007, 0.006, 0.005, 0.004, 0.003]:
    pred_lb = slope * cv + intercept
    print(f'CV {cv:.3f} → Predicted LB {pred_lb:.4f}')

# What CV do we need to beat target 0.01727?
target = 0.01727
required_cv = (target - intercept) / slope
print(f'\nTo beat target {target}: need CV < {required_cv:.6f}')

=== UPDATED CV-LB LINEAR FIT ===
LB = 4.19 * CV + 0.0537
R² = 0.9551
Slope std error: 0.3431
CV 0.008 → Predicted LB 0.0872
CV 0.007 → Predicted LB 0.0830
CV 0.006 → Predicted LB 0.0788
CV 0.005 → Predicted LB 0.0746
CV 0.004 → Predicted LB 0.0704
CV 0.003 → Predicted LB 0.0662

To beat target 0.01727: need CV < -0.008685


In [3]:
# Confidence intervals for the linear fit
n = len(cv_scores)
mean_cv = np.mean(cv_scores)
ss_cv = np.sum((cv_scores - mean_cv)**2)

# Standard error of the estimate
residuals = lb_scores - (slope * cv_scores + intercept)
mse_residuals = np.sum(residuals**2) / (n - 2)
se_estimate = np.sqrt(mse_residuals)

print('=== CONFIDENCE INTERVALS ===')
print(f'Standard error of estimate: {se_estimate:.6f}')
print(f'Mean CV: {mean_cv:.6f}')
print(f'SS_CV: {ss_cv:.10f}')

# 95% CI for intercept
t_crit = stats.t.ppf(0.975, n - 2)
se_intercept = se_estimate * np.sqrt(1/n + mean_cv**2/ss_cv)
ci_intercept = (intercept - t_crit * se_intercept, intercept + t_crit * se_intercept)
print(f'\nIntercept 95% CI: [{ci_intercept[0]:.4f}, {ci_intercept[1]:.4f}]')

# 95% CI for slope
se_slope = se_estimate / np.sqrt(ss_cv)
ci_slope = (slope - t_crit * se_slope, slope + t_crit * se_slope)
print(f'Slope 95% CI: [{ci_slope[0]:.2f}, {ci_slope[1]:.2f}]')

=== CONFIDENCE INTERVALS ===
Standard error of estimate: 0.001128
Mean CV: 0.010022
SS_CV: 0.0000108156

Intercept 95% CI: [0.0455, 0.0618]
Slope 95% CI: [3.38, 5.00]


In [4]:
# Analyze improvement trajectory
print('=== IMPROVEMENT TRAJECTORY ===')
for i in range(1, len(df)):
    prev = df.iloc[i-1]
    curr = df.iloc[i]
    cv_change = (curr['cv'] - prev['cv']) / prev['cv'] * 100
    lb_change = (curr['lb'] - prev['lb']) / prev['lb'] * 100
    print(f"{prev['exp']} → {curr['exp']}: CV {cv_change:+.2f}%, LB {lb_change:+.2f}%")

# Overall improvement
first = df.iloc[0]
last = df.iloc[-1]
print(f"\nOverall: CV {(last['cv']-first['cv'])/first['cv']*100:+.1f}%, LB {(last['lb']-first['lb'])/first['lb']*100:+.1f}%")

=== IMPROVEMENT TRAJECTORY ===
exp_000 → exp_001: CV +10.81%, LB +8.45%
exp_001 → exp_003: CV -14.63%, LB -8.73%
exp_003 → exp_005: CV -0.95%, LB -0.31%
exp_005 → exp_006: CV -6.73%, LB -2.37%
exp_006 → exp_007: CV -4.12%, LB -1.48%
exp_007 → exp_009: CV -1.08%, LB +0.43%
exp_009 → exp_012: CV -2.17%, LB -2.46%
exp_012 → exp_024: CV -3.33%, LB -2.19%

Overall: CV -21.6%, LB -9.1%


In [5]:
# Gap to target analysis
print('=== GAP TO TARGET ===')
target = 0.01727
best_lb = df['lb'].min()
gap = best_lb - target
gap_pct = gap / target * 100

print(f'Target: {target}')
print(f'Best LB: {best_lb:.4f}')
print(f'Gap: {gap:.4f} ({gap_pct:.1f}% above target)')
print(f'Ratio: {best_lb/target:.2f}x')

# How much improvement needed?
improvement_needed = (best_lb - target) / best_lb * 100
print(f'\nNeed to improve LB by {improvement_needed:.1f}% to reach target')

=== GAP TO TARGET ===
Target: 0.01727
Best LB: 0.0893
Gap: 0.0720 (417.1% above target)
Ratio: 5.17x

Need to improve LB by 80.7% to reach target


In [6]:
# What approaches haven't been tried?
print('=== UNEXPLORED APPROACHES ===')
print('''
1. 4-Model Ensemble (MLP + XGB + RF + LGBM)
   - Current: MLP + LGBM only
   - Potential: More diversity in ensemble

2. Per-Target Models
   - SM has different characteristics (mean 0.52) vs Products (mean 0.13)
   - Competition explicitly allows different hyperparameters per target

3. Stacking Meta-Learner
   - Current: Fixed weights (0.6 MLP + 0.4 LGBM)
   - Potential: Learn optimal weights from OOF predictions

4. Non-linear Mixture Encoding
   - Current: Linear interpolation A*(1-pct) + B*pct
   - Potential: Add interaction term A*B*pct*(1-pct)

5. Larger MLP Ensemble
   - Current: 5 MLPs
   - Potential: 10-15 MLPs for variance reduction

6. Different Feature Subsets
   - Try DRFP-only, Spange-only, ACS-only models
   - Ensemble models trained on different feature sets
''')

=== UNEXPLORED APPROACHES ===

1. 4-Model Ensemble (MLP + XGB + RF + LGBM)
   - Current: MLP + LGBM only
   - Potential: More diversity in ensemble

2. Per-Target Models
   - SM has different characteristics (mean 0.52) vs Products (mean 0.13)
   - Competition explicitly allows different hyperparameters per target

3. Stacking Meta-Learner
   - Current: Fixed weights (0.6 MLP + 0.4 LGBM)
   - Potential: Learn optimal weights from OOF predictions

4. Non-linear Mixture Encoding
   - Current: Linear interpolation A*(1-pct) + B*pct
   - Potential: Add interaction term A*B*pct*(1-pct)

5. Larger MLP Ensemble
   - Current: 5 MLPs
   - Potential: 10-15 MLPs for variance reduction

6. Different Feature Subsets
   - Try DRFP-only, Spange-only, ACS-only models
   - Ensemble models trained on different feature sets



In [7]:
# Priority ranking based on expected impact
print('=== PRIORITY RANKING ===')
print('''
HIGH IMPACT (try first):
1. 4-Model Ensemble - More model diversity typically helps
2. Per-Target Models - Exploit target-specific patterns
3. Stacking Meta-Learner - Learn optimal combination

MEDIUM IMPACT:
4. Non-linear Mixture Encoding - May capture interaction effects
5. Larger MLP Ensemble - Variance reduction

LOW IMPACT (already tried variations):
6. Different architectures - Already optimized
7. Different features - ACS PCA already added
''')

print('\n=== RECOMMENDED NEXT EXPERIMENT ===')
print('''
exp_025: 4-Model Ensemble with ACS PCA Features
- MLP (5 models, [32,16])
- LightGBM
- XGBoost
- Random Forest
- Weights: MLP 0.4, XGB 0.2, RF 0.2, LGBM 0.2
- Expected CV improvement: 2-5%
''')

=== PRIORITY RANKING ===

HIGH IMPACT (try first):
1. 4-Model Ensemble - More model diversity typically helps
2. Per-Target Models - Exploit target-specific patterns
3. Stacking Meta-Learner - Learn optimal combination

MEDIUM IMPACT:
4. Non-linear Mixture Encoding - May capture interaction effects
5. Larger MLP Ensemble - Variance reduction

LOW IMPACT (already tried variations):
6. Different architectures - Already optimized
7. Different features - ACS PCA already added


=== RECOMMENDED NEXT EXPERIMENT ===

exp_025: 4-Model Ensemble with ACS PCA Features
- MLP (5 models, [32,16])
- LightGBM
- XGBoost
- Random Forest
- Weights: MLP 0.4, XGB 0.2, RF 0.2, LGBM 0.2
- Expected CV improvement: 2-5%

