# Loop 59 Analysis: Submission Failure Investigation

**Issue:** exp_056 (CV 0.0093) failed with 'Evaluation metric raised an unexpected error'

**Goal:** Understand why the submission failed and identify the best path forward.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load submission history
submissions = [
    {'exp': 'exp_000', 'cv': 0.0111, 'lb': 0.0982},
    {'exp': 'exp_001', 'cv': 0.0123, 'lb': 0.1065},
    {'exp': 'exp_003', 'cv': 0.0105, 'lb': 0.0972},
    {'exp': 'exp_005', 'cv': 0.0104, 'lb': 0.0969},
    {'exp': 'exp_006', 'cv': 0.0097, 'lb': 0.0946},
    {'exp': 'exp_007', 'cv': 0.0093, 'lb': 0.0932},
    {'exp': 'exp_009', 'cv': 0.0092, 'lb': 0.0936},
    {'exp': 'exp_012', 'cv': 0.0090, 'lb': 0.0913},
    {'exp': 'exp_024', 'cv': 0.0087, 'lb': 0.0893},
    {'exp': 'exp_026', 'cv': 0.0085, 'lb': 0.0887},
    {'exp': 'exp_030', 'cv': 0.0083, 'lb': 0.0877},
    {'exp': 'exp_035', 'cv': 0.0098, 'lb': 0.0970},
]

df = pd.DataFrame(submissions)
print("Submission History:")
print(df.to_string(index=False))

Submission History:
    exp     cv     lb
exp_000 0.0111 0.0982
exp_001 0.0123 0.1065
exp_003 0.0105 0.0972
exp_005 0.0104 0.0969
exp_006 0.0097 0.0946
exp_007 0.0093 0.0932
exp_009 0.0092 0.0936
exp_012 0.0090 0.0913
exp_024 0.0087 0.0893
exp_026 0.0085 0.0887
exp_030 0.0083 0.0877
exp_035 0.0098 0.0970


In [2]:
# CV-LB Relationship Analysis
from sklearn.linear_model import LinearRegression

X = df['cv'].values.reshape(-1, 1)
y = df['lb'].values

reg = LinearRegression()
reg.fit(X, y)

slope = reg.coef_[0]
intercept = reg.intercept_
r2 = reg.score(X, y)

print(f"\nCV-LB Linear Relationship:")
print(f"  LB = {slope:.2f} * CV + {intercept:.4f}")
print(f"  R-squared = {r2:.4f}")
print(f"\nIntercept Analysis:")
print(f"  Intercept: {intercept:.4f}")
print(f"  Target: 0.0347")
print(f"  Gap: {intercept - 0.0347:.4f}")
print(f"\nRequired CV to hit target:")
required_cv = (0.0347 - intercept) / slope
print(f"  (0.0347 - {intercept:.4f}) / {slope:.2f} = {required_cv:.6f}")
if required_cv < 0:
    print(f"  IMPOSSIBLE: Required CV is negative!")
else:
    print(f"  Required CV: {required_cv:.6f}")


CV-LB Linear Relationship:
  LB = 4.31 * CV + 0.0525
  R-squared = 0.9505

Intercept Analysis:
  Intercept: 0.0525
  Target: 0.0347
  Gap: 0.0178

Required CV to hit target:
  (0.0347 - 0.0525) / 4.31 = -0.004130
  IMPOSSIBLE: Required CV is negative!


In [3]:
# Check current submission file
submission = pd.read_csv('/home/submission/submission.csv')
print("Current Submission File:")
print(f"  Shape: {submission.shape}")
print(f"  Columns: {submission.columns.tolist()}")
print(f"\nTask 0 (single solvent):")
task0 = submission[submission['task'] == 0]
print(f"  Rows: {len(task0)}")
print(f"  Folds: {task0['fold'].nunique()}")
print(f"  Fold values: {sorted(task0['fold'].unique())}")
print(f"\nTask 1 (full data):")
task1 = submission[submission['task'] == 1]
print(f"  Rows: {len(task1)}")
print(f"  Folds: {task1['fold'].nunique()}")
print(f"  Fold values: {sorted(task1['fold'].unique())}")
print(f"\nTarget statistics:")
for col in ['target_1', 'target_2', 'target_3']:
    print(f"  {col}: min={submission[col].min():.6f}, max={submission[col].max():.6f}, mean={submission[col].mean():.6f}")
    print(f"    NaN: {submission[col].isna().sum()}, Inf: {np.isinf(submission[col]).sum()}")
    print(f"    Values > 1: {(submission[col] > 1).sum()}, Values < 0: {(submission[col] < 0).sum()}")

Current Submission File:
  Shape: (1883, 8)
  Columns: ['id', 'index', 'task', 'fold', 'row', 'target_1', 'target_2', 'target_3']

Task 0 (single solvent):
  Rows: 656
  Folds: 24
  Fold values: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

Task 1 (full data):
  Rows: 1227
  Folds: 13
  Fold values: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

Target statistics:
  target_1: min=0.000000, max=0.392628, mean=0.155242
    NaN: 0, Inf: 0
    Values > 1: 0, Values < 0: 0
  target_2: min=0.000000, max=0.379141, mean=0.132814
    NaN: 0, Inf: 0
    Values > 1: 0, Values < 0: 0
  target_3: min=0.014701, max=0.994014, mean=0.518495
    NaN: 0, Inf: 0
    Values > 1: 0, Values < 0: 0


In [4]:
# Summary of key findings
print("="*60)
print("LOOP 59 ANALYSIS SUMMARY")
print("="*60)

print(f"\n1. CV-LB RELATIONSHIP:")
print(f"   LB = {slope:.2f} * CV + {intercept:.4f} (R-squared = {r2:.4f})")
print(f"   Intercept ({intercept:.4f}) > Target (0.0347)")
print(f"   This means even CV=0 would give LB={intercept:.4f}")

best_lb = df['lb'].min()
best_cv = df[df['lb'] == best_lb]['cv'].values[0]
print(f"\n2. BEST RESULTS:")
print(f"   Best LB: {best_lb} (exp_030)")
print(f"   Best CV: {df['cv'].min()} (exp_030)")
print(f"   Gap to target: {best_lb - 0.0347:.4f}")

print(f"\n3. SUBMISSION FAILURE:")
print(f"   exp_056 (CV 0.0093) failed with 'Evaluation metric raised an unexpected error'")
print(f"   The submission format looks correct (1883 rows, 24+13 folds)")
print(f"   All targets are in [0, 1] range")

print(f"\n4. REMAINING SUBMISSIONS: 5")

print(f"\n5. KEY INSIGHT:")
print(f"   The intercept ({intercept:.4f}) is HIGHER than the target (0.0347)")
print(f"   This is a STRUCTURAL distribution shift problem")
print(f"   No amount of CV improvement will reach the target with current approaches")
print(f"   We need approaches that CHANGE the CV-LB relationship (reduce intercept)")

LOOP 59 ANALYSIS SUMMARY

1. CV-LB RELATIONSHIP:
   LB = 4.31 * CV + 0.0525 (R-squared = 0.9505)
   Intercept (0.0525) > Target (0.0347)
   This means even CV=0 would give LB=0.0525

2. BEST RESULTS:
   Best LB: 0.0877 (exp_030)
   Best CV: 0.0083 (exp_030)
   Gap to target: 0.0530

3. SUBMISSION FAILURE:
   exp_056 (CV 0.0093) failed with 'Evaluation metric raised an unexpected error'
   The submission format looks correct (1883 rows, 24+13 folds)
   All targets are in [0, 1] range

4. REMAINING SUBMISSIONS: 5

5. KEY INSIGHT:
   The intercept (0.0525) is HIGHER than the target (0.0347)
   This is a STRUCTURAL distribution shift problem
   No amount of CV improvement will reach the target with current approaches
   We need approaches that CHANGE the CV-LB relationship (reduce intercept)
