# Loop 3 Analysis: Understanding the Gap and Planning Ensemble

## Current Status
- Best CV: 87.36 (candidate_002 from local_search)
- Target: 68.95
- Gap: 18.41 points (21% improvement needed)

## Key Questions
1. What's the score breakdown by N range for our best candidate?
2. What would ensemble from available kernels achieve?
3. What's blocking further improvement?

In [None]:
import pandas as pd
import numpy as np
import math
import os
import glob
from numba import njit

# Tree template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

TX, TY = make_polygon_template()

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

print('Functions defined')

In [None]:
# Analyze our best candidate (candidate_002)
df_best = pd.read_csv('/home/code/submission_candidates/candidate_002.csv')
df_best['N'] = df_best['id'].str.split('_').str[0].astype(int)

scores_by_n = {}
for n in range(1, 201):
    g = df_best[df_best['N'] == n]
    xs = strip(g['x'].values)
    ys = strip(g['y'].values)
    degs = strip(g['deg'].values)
    scores_by_n[n] = score_group(xs, ys, degs, TX, TY)

total = sum(scores_by_n.values())
print(f'Total score: {total:.6f}')
print(f'Target: 68.947559')
print(f'Gap: {total - 68.947559:.6f}')

# Score breakdown by range
ranges = [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]
print('\nScore breakdown:')
for start, end in ranges:
    range_score = sum(scores_by_n[n] for n in range(start, end + 1))
    print(f'  N={start:3d}-{end:3d}: {range_score:.4f}')

In [None]:
# Check what kernels have submission files we can use
kernel_dirs = [
    '/home/code/research/kernels/zaburo_88-32999-a-well-aligned-initial-solution',
    '/home/code/research/kernels/smartmanoj_santa-claude',
    '/home/code/research/kernels/saspav_santa-submission',
    '/home/code/research/kernels/jazivxt_why-not',
    '/home/code/research/kernels/yongsukprasertsuk_santa-2025-best-keeping-bbox3-runner',
    '/home/code/research/kernels/jonathanchan_santa25-ensemble-sa-fractional-translation',
]

print('Checking for submission files in kernels...')
for kdir in kernel_dirs:
    csvs = glob.glob(f'{kdir}/**/*.csv', recursive=True)
    print(f'{os.path.basename(kdir)}: {len(csvs)} CSV files')
    for csv in csvs[:3]:
        print(f'  - {csv}')

In [None]:
# Load all our candidates and create ensemble
candidates = [
    '/home/code/submission_candidates/candidate_000.csv',
    '/home/code/submission_candidates/candidate_001.csv',
    '/home/code/submission_candidates/candidate_002.csv',
    '/home/code/submission_candidates/candidate_003.csv',
]

# Also check experiments for any additional submissions
experiment_csvs = glob.glob('/home/code/experiments/**/submission.csv', recursive=True)
print(f'Found {len(experiment_csvs)} experiment submissions:')
for csv in experiment_csvs:
    print(f'  - {csv}')

all_csvs = candidates + experiment_csvs
print(f'\nTotal CSVs to ensemble: {len(all_csvs)}')

In [None]:
# Create ensemble from all available sources
best = {n: {'score': 1e300, 'data': None, 'src': None} for n in range(1, 201)}

for fp in all_csvs:
    try:
        df = pd.read_csv(fp)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        df['N'] = df['id'].str.split('_').str[0].astype(int)
        
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].values)
            ys = strip(g['y'].values)
            degs = strip(g['deg'].values)
            sc = score_group(xs, ys, degs, TX, TY)
            if sc < best[n]['score']:
                best[n]['score'] = sc
                best[n]['data'] = g[['id', 'x', 'y', 'deg']].copy()
                best[n]['src'] = os.path.basename(fp)
    except Exception as e:
        print(f'Error loading {fp}: {e}')

ensemble_score = sum(best[n]['score'] for n in range(1, 201))
print(f'Ensemble score: {ensemble_score:.6f}')
print(f'Improvement over best single: {total - ensemble_score:.6f}')

In [None]:
# Analyze which N values have the most room for improvement
print('Top 20 N values with highest scores (most room for improvement):')
sorted_n = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_n[:20]:
    print(f'  N={n:3d}: {score:.6f} (weight: {1/n:.4f})')

In [None]:
# Compare our scores to theoretical minimum
# For N trees, minimum bbox side is roughly sqrt(N * tree_area)
# Tree area is approximately 0.7 * 1.0 = 0.7 (rough estimate)

print('Comparison to theoretical estimates:')
print('N\tOur Score\tTheoretical Min\tGap')
for n in [1, 2, 5, 10, 20, 50, 100, 200]:
    our_score = scores_by_n[n]
    # Theoretical minimum: if trees pack perfectly, side ~ sqrt(N * 0.5)
    # Score = side^2 / N = N * 0.5 / N = 0.5 (theoretical limit)
    # But trees don't pack perfectly, so actual is higher
    print(f'{n}\t{our_score:.4f}\t\t~0.5\t\t{our_score - 0.5:.4f}')

In [None]:
# Check what the jonathanchan kernel achieves
# Let's extract the C++ code and understand the approach
print('Key insights from jonathanchan kernel:')
print('1. Ensemble from 15+ public sources')
print('2. C++ SA with 15,000-20,000 iterations')
print('3. 80+ restarts per N')
print('4. Fractional translation refinement')
print()
print('Our current approach is missing:')
print('- Ensemble from public sources (we only have our own candidates)')
print('- High iteration count (we have 300-2000, they have 15,000-20,000)')
print('- Multiple restarts (we have 1-3, they have 80+)')
print('- C++ implementation (we use Python)')