# Loop 3 LB Feedback Analysis

## Key Findings:
1. CV-LB gap is exactly 0.0000 - our local scoring is perfect
2. Tessellation approach FAILED for all N values tested
3. Better snapshot exists at 70.647306 (vs current 70.658891)
4. Gap to target: 1.74 points (2.5%)

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def get_score_per_n(df):
    """Get score contribution for each N"""
    scores = {}
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        n_trees = df[df['id'].str.startswith(prefix)]
        if len(n_trees) != n:
            continue
        
        all_coords = []
        for _, row in n_trees.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            base = Polygon(zip(TX, TY))
            rotated = affinity.rotate(base, deg, origin=(0, 0))
            poly = affinity.translate(rotated, x, y)
            coords = np.array(poly.exterior.coords)
            all_coords.append(coords)
        
        all_coords = np.vstack(all_coords)
        x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
        y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
        side = max(x_range, y_range)
        scores[n] = {'side': side, 'score': side**2 / n}
    return scores

print('Functions defined')

In [None]:
# Load current best and the better snapshot
df_current = pd.read_csv('/home/nonroot/snapshots/santa-2025/21164519357/code/exploration/santa-2025.csv')
df_better = pd.read_csv('/home/nonroot/snapshots/santa-2025/21165874980/code/submission_candidates/candidate_001.csv')

print('Current best:', df_current.shape)
print('Better snapshot:', df_better.shape)

# Calculate scores
scores_current = get_score_per_n(df_current)
scores_better = get_score_per_n(df_better)

total_current = sum(s['score'] for s in scores_current.values())
total_better = sum(s['score'] for s in scores_better.values())

print(f'\nCurrent best total: {total_current:.6f}')
print(f'Better snapshot total: {total_better:.6f}')
print(f'Improvement: {total_current - total_better:.6f}')

In [None]:
# Compare per-N scores to find where the better snapshot is better
print('N values where better snapshot is better:')
print('='*60)

improvements = []
for n in range(1, 201):
    if n in scores_current and n in scores_better:
        diff = scores_current[n]['score'] - scores_better[n]['score']
        if abs(diff) > 1e-8:
            improvements.append((n, diff, scores_current[n]['score'], scores_better[n]['score']))

improvements.sort(key=lambda x: -x[1])  # Sort by improvement (descending)

print(f'\nTop improvements (better snapshot is better):')
for n, diff, curr, better in improvements[:20]:
    if diff > 0:
        print(f'  N={n:3d}: current={curr:.6f}, better={better:.6f}, improvement={diff:+.6f}')

print(f'\nTop regressions (current is better):')
for n, diff, curr, better in improvements[-10:]:
    if diff < 0:
        print(f'  N={n:3d}: current={curr:.6f}, better={better:.6f}, regression={diff:+.6f}')

In [None]:
# Create ensemble: take best of each N
print('Creating ensemble from best of each N...')

ensemble_total = 0
for n in range(1, 201):
    if n in scores_current and n in scores_better:
        ensemble_total += min(scores_current[n]['score'], scores_better[n]['score'])
    elif n in scores_current:
        ensemble_total += scores_current[n]['score']
    elif n in scores_better:
        ensemble_total += scores_better[n]['score']

print(f'\nEnsemble total: {ensemble_total:.6f}')
print(f'vs Current: {total_current:.6f} (improvement: {total_current - ensemble_total:.6f})')
print(f'vs Better: {total_better:.6f} (improvement: {total_better - ensemble_total:.6f})')

In [None]:
# Check all snapshots for even better solutions
import glob

print('Searching all snapshots for better solutions...')

all_csvs = glob.glob('/home/nonroot/snapshots/santa-2025/*/code/**/*.csv', recursive=True)
print(f'Found {len(all_csvs)} CSV files')

# Sample a few to check
best_score = total_better
best_path = '/home/nonroot/snapshots/santa-2025/21165874980/code/submission_candidates/candidate_001.csv'

for csv_path in all_csvs[:50]:  # Check first 50
    try:
        df = pd.read_csv(csv_path)
        if 'id' not in df.columns or 'x' not in df.columns:
            continue
        if len(df) != 20100:
            continue
        
        scores = get_score_per_n(df)
        total = sum(s['score'] for s in scores.values())
        
        if total < best_score:
            print(f'BETTER: {csv_path}: {total:.6f}')
            best_score = total
            best_path = csv_path
    except:
        pass

print(f'\nBest found: {best_score:.6f} at {best_path}')

In [None]:
# Gap analysis: where is the most room for improvement?
print('Gap analysis: Score contribution by N range')
print('='*60)

# Group by N ranges
ranges = [(1, 10), (11, 20), (21, 50), (51, 100), (101, 150), (151, 200)]

for start, end in ranges:
    range_score = sum(scores_better[n]['score'] for n in range(start, end+1) if n in scores_better)
    print(f'N={start:3d}-{end:3d}: {range_score:.4f}')

print(f'\nTotal: {total_better:.6f}')
print(f'Target: 68.919154')
print(f'Gap: {total_better - 68.919154:.6f} ({(total_better - 68.919154) / 68.919154 * 100:.2f}%)')