# Loop 37 Analysis: Critical Assessment

## Key Questions:
1. What is the actual gap to target?
2. What approaches have been tried vs what's left?
3. What do top solutions do differently?

In [None]:
import pandas as pd
import numpy as np
import json

# Load session state
with open('/home/code/session_state.json', 'r') as f:
    state = json.load(f)

# Analyze experiments
experiments = state['experiments']
print(f"Total experiments: {len(experiments)}")
print(f"\nScore progression:")
for exp in experiments[-15:]:
    print(f"  {exp['name']}: CV={exp['cv_score']:.6f}, LB={exp.get('lb_score', 'N/A')}")

# Best scores
cv_scores = [e['cv_score'] for e in experiments]
best_cv = min(cv_scores)
print(f"\nBest CV: {best_cv:.6f}")
print(f"Target: 68.866853")
print(f"Gap: {best_cv - 68.866853:.6f} ({(best_cv - 68.866853)/68.866853*100:.2f}%)")


In [None]:
# Categorize experiments by approach type
approach_types = {
    'baseline': [],
    'local_search': [],  # SA, exhaustive, NFP
    'ensemble': [],
    'constructive': [],  # lattice, BLF, interlock
    'population': [],  # GA
    'extended_opt': []  # long bbox3 runs
}

for exp in experiments:
    name = exp['name'].lower()
    notes = exp.get('notes', '').lower()
    
    if 'baseline' in name:
        approach_types['baseline'].append(exp)
    elif any(x in name for x in ['sa', 'annealing', 'exhaustive', 'nfp', 'shake', 'numba']):
        approach_types['local_search'].append(exp)
    elif 'ensemble' in name:
        approach_types['ensemble'].append(exp)
    elif any(x in name for x in ['lattice', 'blf', 'interlock', 'jostle', 'constructive']):
        approach_types['constructive'].append(exp)
    elif 'genetic' in name or 'ga' in name:
        approach_types['population'].append(exp)
    elif '8hr' in name or 'extended' in name:
        approach_types['extended_opt'].append(exp)
    else:
        approach_types['local_search'].append(exp)  # default

print("Experiments by approach type:")
for approach, exps in approach_types.items():
    if exps:
        best = min(e['cv_score'] for e in exps)
        print(f"  {approach}: {len(exps)} experiments, best={best:.6f}")


In [None]:
# Analyze per-N scores from best submission
baseline_path = '/home/code/experiments/029_final_ensemble_v2/submission.csv'
df = pd.read_csv(baseline_path)

# Parse coordinates
def parse_coord(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

df['n'] = df['id'].apply(lambda x: int(str(x).split('_')[0]))
df['i'] = df['id'].apply(lambda x: int(str(x).split('_')[1]))
for col in ['x', 'y', 'deg']:
    df[col] = df[col].apply(parse_coord)

# Calculate per-N scores
per_n_scores = {}
for n in range(1, 201):
    n_df = df[df['n'] == n]
    if len(n_df) > 0:
        min_x = n_df['x'].min()
        max_x = n_df['x'].max()
        min_y = n_df['y'].min()
        max_y = n_df['y'].max()
        # This is approximate - need to account for tree geometry
        side = max(max_x - min_x, max_y - min_y) + 1.0  # rough tree size
        per_n_scores[n] = side**2 / n

print("Top 10 highest score contributors (worst N values):")
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    print(f"  N={n}: {score:.4f}")

print("\nTotal score:", sum(per_n_scores.values()))


In [None]:
# What's the theoretical minimum?
# For N trees, the minimum bounding box is limited by the tree geometry
# Tree dimensions: width ~0.7, height ~1.0

# Theoretical analysis:
# - N=1: Single tree, min side = max(0.7, 1.0) = 1.0, score = 1.0
# - But with rotation, we can get smaller bounding box
# - Optimal N=1 rotation gives ~0.813 (from baseline)

# The gap to target is 1.45 points (2.1%)
# This is distributed across all 200 N values
# Average improvement needed per N: 1.45/200 = 0.00725

print("Gap analysis:")
print(f"  Current best: 70.315537")
print(f"  Target: 68.866853")
print(f"  Gap: 1.448684 points")
print(f"  Average improvement needed per N: {1.448684/200:.6f}")
print(f"")
print("  If we improve 50 N values by 0.03 each: 1.5 points")
print("  If we improve 100 N values by 0.015 each: 1.5 points")
print("  If we improve 200 N values by 0.0075 each: 1.5 points")


In [None]:
# Check what external data sources have been tried
print("External data sources mentioned in experiments:")
for exp in experiments:
    notes = exp.get('notes', '')
    if 'external' in notes.lower() or 'snapshot' in notes.lower() or 'csv' in notes.lower():
        print(f"  {exp['name']}: {notes[:200]}...")


In [None]:
# Key insight from research:
# Top teams run bbox3 for 24-72 HOURS with 24+ CPUs
# Our longest run was 53 minutes on 1 CPU
# That's 1/648th to 1/1944th of top competitor compute

print("COMPUTE TIME ANALYSIS:")
print("")
print("Top competitors:")
print("  - Run time: 24-72 hours")
print("  - CPUs: 24+")
print("  - Total compute: 576-1728 CPU-hours")
print("")
print("Our best attempt:")
print("  - Run time: 53 minutes")
print("  - CPUs: 1")
print("  - Total compute: 0.88 CPU-hours")
print("")
print("Ratio: Our compute is 1/655 to 1/1964 of top competitors")
print("")
print("CONCLUSION: We have NOT tried extended optimization at scale.")
print("This is the ONLY approach that top teams use that we haven't tried.")
