# Loop 8 Analysis: Breaking the 70.734327 Plateau

## Key Findings from Previous Experiments
- 8 experiments, ALL at the same score: 70.734327
- The baseline is extremely well-optimized
- Overlapping CSVs score 67.727 but have 30 N values with overlaps
- All 3.0 points of improvement are locked behind overlaps

## Strategy Analysis
1. The bbox3 binary requires GLIBC 2.34 (we have 2.31) - need to compile our own
2. We have compiled bbox3_local successfully
3. Need to try the bbox3 optimizer with proper parameters

In [None]:
import pandas as pd
import numpy as np
import os

# Check current best score
baseline_path = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv'
df = pd.read_csv(baseline_path)

# Tree shape
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_polygon(cx, cy, deg):
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    x = TX * c - TY * s + cx
    y = TX * s + TY * c + cy
    return x, y

def score_submission(df):
    total = 0
    for n in range(1, 201):
        rows = df[df['id'].str.startswith(f'{n:03d}_')]
        if len(rows) == 0:
            continue
        
        all_x, all_y = [], []
        for _, row in rows.iterrows():
            x_val = float(str(row['x']).replace('s', ''))
            y_val = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            px, py = get_tree_polygon(x_val, y_val, deg)
            all_x.extend(px)
            all_y.extend(py)
        
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        total += side * side / n
    return total

print(f'Baseline score: {score_submission(df):.6f}')
print(f'Target score: 68.931058')
print(f'Gap: {70.734327 - 68.931058:.6f} ({(70.734327 - 68.931058) / 68.931058 * 100:.2f}%)')

In [None]:
# Analyze per-N score contributions to find where improvements might be possible
def score_per_n(df):
    scores = {}
    for n in range(1, 201):
        rows = df[df['id'].str.startswith(f'{n:03d}_')]
        if len(rows) == 0:
            continue
        
        all_x, all_y = [], []
        for _, row in rows.iterrows():
            x_val = float(str(row['x']).replace('s', ''))
            y_val = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            px, py = get_tree_polygon(x_val, y_val, deg)
            all_x.extend(px)
            all_y.extend(py)
        
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        scores[n] = side * side / n
    return scores

baseline_scores = score_per_n(df)

# Find N values with highest score contribution
sorted_scores = sorted(baseline_scores.items(), key=lambda x: x[1], reverse=True)
print('Top 20 N values by score contribution:')
for n, score in sorted_scores[:20]:
    print(f'  N={n:3d}: {score:.6f}')

In [None]:
# Compare with overlapping CSV to see where improvements are possible
overlap_path = '/home/nonroot/snapshots/santa-2025/21108486172/code/experiments/submission_v21.csv'
overlap_df = pd.read_csv(overlap_path)
overlap_scores = score_per_n(overlap_df)

print('N values where overlap CSV is better (these have overlaps):')
for n in range(1, 201):
    if n in baseline_scores and n in overlap_scores:
        diff = baseline_scores[n] - overlap_scores[n]
        if diff > 0.001:
            print(f'  N={n:3d}: baseline={baseline_scores[n]:.6f}, overlap={overlap_scores[n]:.6f}, diff={diff:.6f}')

In [None]:
# Calculate theoretical minimum score if we could achieve overlap CSV scores for all N
theoretical_min = sum(overlap_scores.values())
print(f'Theoretical minimum (with overlaps): {theoretical_min:.6f}')
print(f'Current baseline: {sum(baseline_scores.values()):.6f}')
print(f'Target: 68.931058')
print(f'Gap from theoretical to target: {theoretical_min - 68.931058:.6f}')