# Loop 113 LB Feedback Analysis

## New Submission Result
- exp_110: SimilarityAwareModel with CORRECT Submission Format
- CV: 0.0129 | LB: 0.1063
- Gap: -0.0934

## Key Question
Does the chemical similarity approach change the CV-LB relationship?

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# All submissions with LB scores
submissions = [
    {'exp': 'exp_000', 'cv': 0.0111, 'lb': 0.0982},
    {'exp': 'exp_001', 'cv': 0.0123, 'lb': 0.1065},
    {'exp': 'exp_003', 'cv': 0.0105, 'lb': 0.0972},
    {'exp': 'exp_005', 'cv': 0.0104, 'lb': 0.0969},
    {'exp': 'exp_006', 'cv': 0.0097, 'lb': 0.0946},
    {'exp': 'exp_007', 'cv': 0.0093, 'lb': 0.0932},
    {'exp': 'exp_009', 'cv': 0.0092, 'lb': 0.0936},
    {'exp': 'exp_012', 'cv': 0.0090, 'lb': 0.0913},
    {'exp': 'exp_024', 'cv': 0.0087, 'lb': 0.0893},
    {'exp': 'exp_026', 'cv': 0.0085, 'lb': 0.0887},
    {'exp': 'exp_030', 'cv': 0.0083, 'lb': 0.0877},  # Best LB
    {'exp': 'exp_035', 'cv': 0.0098, 'lb': 0.0970},
    {'exp': 'exp_073', 'cv': 0.0084, 'lb': 0.1451},  # Outlier - likely bug
    {'exp': 'exp_111', 'cv': 0.0129, 'lb': 0.1063},  # NEW - SimilarityAwareModel
]

df = pd.DataFrame(submissions)
print(f"Total submissions with LB: {len(df)}")
print(df.to_string(index=False))

Total submissions with LB: 14
    exp     cv     lb
exp_000 0.0111 0.0982
exp_001 0.0123 0.1065
exp_003 0.0105 0.0972
exp_005 0.0104 0.0969
exp_006 0.0097 0.0946
exp_007 0.0093 0.0932
exp_009 0.0092 0.0936
exp_012 0.0090 0.0913
exp_024 0.0087 0.0893
exp_026 0.0085 0.0887
exp_030 0.0083 0.0877
exp_035 0.0098 0.0970
exp_073 0.0084 0.1451
exp_111 0.0129 0.1063


In [None]:
# Analyze CV-LB relationship
# Exclude exp_073 (outlier with LB=0.1451 - likely a bug)
df_valid = df[df['exp'] != 'exp_073'].copy()

# Fit linear regression
X = df_valid['cv'].values.reshape(-1, 1)
y = df_valid['lb'].values
reg = LinearRegression()
reg.fit(X, y)

slope = reg.coef_[0]
intercept = reg.intercept_
r2 = reg.score(X, y)

print("="*60)
print("CV-LB RELATIONSHIP ANALYSIS (excluding exp_073 outlier)")
print("="*60)
print(f"\nLinear fit: LB = {slope:.4f} × CV + {intercept:.4f}")
print(f"R-squared: {r2:.4f}")
print(f"\nIntercept: {intercept:.4f}")
print(f"Target LB: 0.0347")
print(f"\nCRITICAL: Intercept ({intercept:.4f}) > Target (0.0347)")
print(f"Required CV to hit target: (0.0347 - {intercept:.4f}) / {slope:.4f} = {(0.0347 - intercept) / slope:.6f}")
print("\n⚠️ NEGATIVE CV REQUIRED - TARGET IS MATHEMATICALLY UNREACHABLE WITH THIS LINE!")

# Check if exp_111 is on the line or off it
exp_111 = df[df['exp'] == 'exp_111'].iloc[0]
expected_lb = slope * exp_111['cv'] + intercept
actual_lb = exp_111['lb']
deviation = actual_lb - expected_lb
print(f"\n" + "="*60)
print("exp_111 (SimilarityAwareModel) ANALYSIS")
print("="*60)
print(f"CV: {exp_111['cv']:.4f}")
print(f"Expected LB from line: {expected_lb:.4f}")
print(f"Actual LB: {actual_lb:.4f}")
print(f"Deviation from line: {deviation:.4f} ({deviation/expected_lb*100:.1f}%)")
if abs(deviation) < 0.005:
    print("\n❌ exp_111 is ON THE LINE - chemical similarity did NOT change the relationship")
else:
    print(f"\n{'✅' if deviation < 0 else '❌'} exp_111 is {'BELOW' if deviation < 0 else 'ABOVE'} the line by {abs(deviation):.4f}")