# Loop 8 Analysis: Simplification Trend Validation

## Key Questions:
1. Is exp_007 [32,16] ready for submission?
2. What's the predicted LB based on CV-LB ratio?
3. Should we continue simplifying or try a different approach?
4. What's the path to beating target 0.0333?

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Submission history with CV and LB scores
submissions = [
    {'exp': 'exp_000', 'arch': '[128,128,64]', 'cv': 0.0111, 'lb': 0.0982},
    {'exp': 'exp_001', 'arch': 'LightGBM', 'cv': 0.0123, 'lb': 0.1065},
    {'exp': 'exp_003', 'arch': '[256,128,64]', 'cv': 0.0105, 'lb': 0.0972},
    {'exp': 'exp_005', 'arch': '[256,128,64] 15m', 'cv': 0.0104, 'lb': 0.0969},
    {'exp': 'exp_006', 'arch': '[64,32]', 'cv': 0.00975, 'lb': 0.0946},
]

df = pd.DataFrame(submissions)
df['ratio'] = df['lb'] / df['cv']
print('Submission History:')
print(df.to_string(index=False))
print(f'\nAverage CV-LB ratio: {df["ratio"].mean():.2f}')
print(f'Std CV-LB ratio: {df["ratio"].std():.2f}')

In [None]:
# Predict LB for exp_007
exp_007_cv = 0.009262
avg_ratio = df['ratio'].mean()
std_ratio = df['ratio'].std()

predicted_lb_mean = exp_007_cv * avg_ratio
predicted_lb_low = exp_007_cv * (avg_ratio - std_ratio)
predicted_lb_high = exp_007_cv * (avg_ratio + std_ratio)

print(f'exp_007 CV: {exp_007_cv:.6f}')
print(f'\nPredicted LB for exp_007:')
print(f'  Mean prediction: {predicted_lb_mean:.4f}')
print(f'  Range: {predicted_lb_low:.4f} - {predicted_lb_high:.4f}')
print(f'\nCurrent best LB: 0.0946')
print(f'Expected improvement: {(0.0946 - predicted_lb_mean) / 0.0946 * 100:.1f}%')

In [None]:
# Simplification trend analysis
mlp_experiments = [
    {'arch': '[256,128,64]', 'params': 256*140 + 128*256 + 64*128 + 3*64, 'cv': 0.0105},
    {'arch': '[64,32]', 'params': 64*140 + 32*64 + 3*32, 'cv': 0.00975},
    {'arch': '[32,16]', 'params': 32*140 + 16*32 + 3*16, 'cv': 0.00926},
]

print('Simplification Trend:')
for exp in mlp_experiments:
    print(f"  {exp['arch']}: {exp['params']:,} params -> CV {exp['cv']:.5f}")

# Calculate improvement per simplification step
print('\nImprovement per step:')
for i in range(1, len(mlp_experiments)):
    prev = mlp_experiments[i-1]
    curr = mlp_experiments[i]
    cv_improvement = (prev['cv'] - curr['cv']) / prev['cv'] * 100
    param_reduction = (prev['params'] - curr['params']) / prev['params'] * 100
    print(f"  {prev['arch']} -> {curr['arch']}: CV -{cv_improvement:.1f}%, params -{param_reduction:.1f}%")

In [None]:
# What would even simpler models look like?
print('Potential even simpler architectures:')
feature_dim = 140

architectures = [
    ('[32,16]', 32*feature_dim + 16*32 + 3*16),  # Current best
    ('[16,8]', 16*feature_dim + 8*16 + 3*8),
    ('[16]', 16*feature_dim + 3*16),
    ('[32]', 32*feature_dim + 3*32),
    ('Linear', 3*feature_dim),
]

for arch, params in architectures:
    print(f"  {arch}: {params:,} params")

print('\nNote: Linear model would have only 420 parameters!')
print('If the simplification trend continues, linear might be optimal.')

In [None]:
# Gap to target analysis
target = 0.0333
current_best_lb = 0.0946
exp_007_predicted_lb = predicted_lb_mean

print('Gap to Target Analysis:')
print(f'  Target: {target}')
print(f'  Current best LB: {current_best_lb}')
print(f'  exp_007 predicted LB: {exp_007_predicted_lb:.4f}')
print(f'\n  Gap from current best: {(current_best_lb - target) / target * 100:.1f}%')
print(f'  Gap from exp_007 predicted: {(exp_007_predicted_lb - target) / target * 100:.1f}%')

# What CV would we need to beat target?
required_cv = target / avg_ratio
print(f'\n  To beat target with {avg_ratio:.2f}x ratio, need CV < {required_cv:.5f}')
print(f'  Current best CV: {exp_007_cv:.5f}')
print(f'  Required improvement: {(exp_007_cv - required_cv) / exp_007_cv * 100:.1f}%')

In [None]:
# Strategic decision
print('='*60)
print('STRATEGIC DECISION')
print('='*60)
print()
print('1. SUBMIT exp_007 [32,16]?')
print('   - Best CV achieved (0.00926)')
print('   - Predicted LB: ~0.085-0.090 (vs current best 0.0946)')
print('   - Expected improvement: ~5-10%')
print('   - RECOMMENDATION: YES - validate simplification trend on LB')
print()
print('2. Continue simplifying after submission?')
print('   - Try [16,8] or [16] or linear model')
print('   - The trend shows no plateau yet')
print('   - RECOMMENDATION: YES - if LB improves, continue simplifying')
print()
print('3. Path to target 0.0333?')
print('   - Current gap: ~2.8x (0.0946 -> 0.0333)')
print('   - Simplification alone unlikely to close this gap')
print('   - May need fundamentally different approach (GNN, etc.)')
print('   - BUT: Continue simplifying to find optimal MLP baseline')
print()
print('4. Remaining submissions: 2')
print('   - Submit exp_007 now (1 remaining after)')
print('   - Final submission: best approach based on all experiments')