# Loop 63 Analysis - Final Push

## Situation
- Best LB: 0.0873 (exp_032)
- Target: 0.0702
- Gap: 24.3%
- 4 submissions remaining

## Key Insights from Kernels
1. matthewmaree: CatBoost + XGBoost ensemble with correlation-filtered features
2. mixall: MLP + XGB + RF + LGBM ensemble
3. Both use multiple feature sources with filtering

In [None]:
import pandas as pd
import numpy as np
import json

# Load session state to see all experiments
with open('/home/code/session_state.json') as f:
    state = json.load(f)

# Get submission history
submissions = state.get('submissions', [])
print('Submission History:')
for s in submissions:
    print(f"  {s.get('experiment_id', 'N/A')}: CV={s.get('cv_score', 'N/A'):.4f}, LB={s.get('lb_score', 'N/A'):.4f}")

# Calculate CV-LB relationship
cv_scores = [s.get('cv_score', 0) for s in submissions if s.get('cv_score')]
lb_scores = [s.get('lb_score', 0) for s in submissions if s.get('lb_score')]

if len(cv_scores) >= 2:
    from scipy import stats
    slope, intercept, r_value, p_value, std_err = stats.linregress(cv_scores, lb_scores)
    print(f'\nCV-LB Relationship: LB = {slope:.2f}*CV + {intercept:.4f} (RÂ²={r_value**2:.3f})')
    print(f'To hit target 0.0702: Need CV = {(0.0702 - intercept) / slope:.6f}')

In [None]:
# Analyze what worked and what didn't
experiments = state.get('experiments', [])

# Group by score
best_cv = min([e.get('score', 1.0) for e in experiments])
print(f'Best CV achieved: {best_cv:.6f}')

# Find experiments with best CV
best_exps = [e for e in experiments if e.get('score', 1.0) < 0.009]
print(f'\nExperiments with CV < 0.009:')
for e in best_exps:
    print(f"  {e.get('id')}: {e.get('name')} - CV={e.get('score', 'N/A'):.6f}")

In [None]:
# Key question: What approaches haven't been tried?
# From kernels:
# 1. CatBoost with MultiRMSE loss (matthewmaree)
# 2. Correlation-based feature filtering (matthewmaree)
# 3. Different ensemble weights for single vs full (matthewmaree)
# 4. Clipping + normalization (matthewmaree)

# Our experiments tried:
# - MLP, LightGBM, XGBoost, CatBoost, RandomForest, GP
# - Various feature combinations
# - Various ensemble weights

# What's different in matthewmaree?
# 1. Uses ALL feature sources (spange, acs_pca, drfps, fragprints, smiles)
# 2. Correlation filtering with threshold 0.90
# 3. CatBoost with specific hyperparameters
# 4. Different weights for single (7:6) vs full (1:2)

print('Key differences in matthewmaree kernel:')
print('1. Uses ALL feature sources with correlation filtering')
print('2. CatBoost with MultiRMSE loss')
print('3. Different ensemble weights for single vs full')
print('4. Clipping + normalization of predictions')

In [None]:
# Let's check what features we have available
import os

data_path = '/home/data'
print('Available data files:')
for f in os.listdir(data_path):
    print(f'  {f}')

# Load and check feature dimensions
spange = pd.read_csv(f'{data_path}/spange_descriptors_lookup.csv')
drfp = pd.read_csv(f'{data_path}/drfps_catechol_lookup.csv')
acs_pca = pd.read_csv(f'{data_path}/acs_pca_descriptors_lookup.csv')
fragprints = pd.read_csv(f'{data_path}/fragprints_lookup.csv')

print(f'\nFeature dimensions:')
print(f'  Spange: {spange.shape}')
print(f'  DRFP: {drfp.shape}')
print(f'  ACS PCA: {acs_pca.shape}')
print(f'  Fragprints: {fragprints.shape}')

In [None]:
# Strategy for final push:
# 1. Try CatBoost + XGBoost ensemble like matthewmaree
# 2. Use correlation-filtered features
# 3. Different weights for single vs full

# But first, let's understand the CV-LB gap better
# The gap is structural - improving CV alone won't help
# We need to reduce the intercept

# Options:
# 1. Try a fundamentally different approach (GNN failed)
# 2. Try different feature engineering
# 3. Try different model architecture
# 4. Try different ensemble strategy

print('Final strategy options:')
print('1. CatBoost + XGBoost ensemble (matthewmaree style)')
print('2. Try fragprints features (not used in our best model)')
print('3. Try correlation filtering (not used in our best model)')
print('4. Try different ensemble weights for single vs full')