# Loop 8 Analysis: Breaking the 70.734327 Plateau

## Key Findings from Previous Experiments
- 8 experiments, ALL at the same score: 70.734327
- The baseline is extremely well-optimized
- Overlapping CSVs score 67.727 but have 30 N values with overlaps
- All 3.0 points of improvement are locked behind overlaps

## Strategy Analysis
1. The bbox3 binary requires GLIBC 2.34 (we have 2.31) - need to compile our own
2. We have compiled bbox3_local successfully
3. Need to try the bbox3 optimizer with proper parameters

In [1]:
import pandas as pd
import numpy as np
import os

# Check current best score
baseline_path = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv'
df = pd.read_csv(baseline_path)

# Tree shape
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_polygon(cx, cy, deg):
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    x = TX * c - TY * s + cx
    y = TX * s + TY * c + cy
    return x, y

def score_submission(df):
    total = 0
    for n in range(1, 201):
        rows = df[df['id'].str.startswith(f'{n:03d}_')]
        if len(rows) == 0:
            continue
        
        all_x, all_y = [], []
        for _, row in rows.iterrows():
            x_val = float(str(row['x']).replace('s', ''))
            y_val = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            px, py = get_tree_polygon(x_val, y_val, deg)
            all_x.extend(px)
            all_y.extend(py)
        
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        total += side * side / n
    return total

print(f'Baseline score: {score_submission(df):.6f}')
print(f'Target score: 68.931058')
print(f'Gap: {70.734327 - 68.931058:.6f} ({(70.734327 - 68.931058) / 68.931058 * 100:.2f}%)')

Baseline score: 70.734327
Target score: 68.931058
Gap: 1.803269 (2.62%)


In [2]:
# Analyze per-N score contributions to find where improvements might be possible
def score_per_n(df):
    scores = {}
    for n in range(1, 201):
        rows = df[df['id'].str.startswith(f'{n:03d}_')]
        if len(rows) == 0:
            continue
        
        all_x, all_y = [], []
        for _, row in rows.iterrows():
            x_val = float(str(row['x']).replace('s', ''))
            y_val = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            px, py = get_tree_polygon(x_val, y_val, deg)
            all_x.extend(px)
            all_y.extend(py)
        
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        scores[n] = side * side / n
    return scores

baseline_scores = score_per_n(df)

# Find N values with highest score contribution
sorted_scores = sorted(baseline_scores.items(), key=lambda x: x[1], reverse=True)
print('Top 20 N values by score contribution:')
for n, score in sorted_scores[:20]:
    print(f'  N={n:3d}: {score:.6f}')

Top 20 N values by score contribution:
  N=  1: 0.661250
  N=  2: 0.450779
  N=  3: 0.434745
  N=  5: 0.416850
  N=  4: 0.416545
  N=  7: 0.399897
  N=  6: 0.399610
  N=  9: 0.387415
  N=  8: 0.385407
  N= 15: 0.379203
  N= 10: 0.376630
  N= 21: 0.376451
  N= 20: 0.376057
  N= 11: 0.375736
  N= 22: 0.375258
  N= 16: 0.374128
  N= 26: 0.373997
  N= 12: 0.372724
  N= 13: 0.372323
  N= 25: 0.372144


In [3]:
# Compare with overlapping CSV to see where improvements are possible
overlap_path = '/home/nonroot/snapshots/santa-2025/21108486172/code/experiments/submission_v21.csv'
overlap_df = pd.read_csv(overlap_path)
overlap_scores = score_per_n(overlap_df)

print('N values where overlap CSV is better (these have overlaps):')
for n in range(1, 201):
    if n in baseline_scores and n in overlap_scores:
        diff = baseline_scores[n] - overlap_scores[n]
        if diff > 0.001:
            print(f'  N={n:3d}: baseline={baseline_scores[n]:.6f}, overlap={overlap_scores[n]:.6f}, diff={diff:.6f}')

N values where overlap CSV is better (these have overlaps):
  N=  3: baseline=0.434745, overlap=0.296773, diff=0.137973
  N=  4: baseline=0.416545, overlap=0.227236, diff=0.189308
  N=  5: baseline=0.416850, overlap=0.212694, diff=0.204155
  N=  6: baseline=0.399610, overlap=0.173625, diff=0.225985
  N=  7: baseline=0.399897, overlap=0.157468, diff=0.242429
  N=  8: baseline=0.385407, overlap=0.187564, diff=0.197844
  N=  9: baseline=0.387415, overlap=0.178013, diff=0.209402
  N= 10: baseline=0.376630, overlap=0.164911, diff=0.211719
  N= 11: baseline=0.375736, overlap=0.257667, diff=0.118070
  N= 12: baseline=0.372724, overlap=0.189158, diff=0.183566
  N= 13: baseline=0.372323, overlap=0.363374, diff=0.008949
  N= 14: baseline=0.371113, overlap=0.308731, diff=0.062381
  N= 15: baseline=0.379203, overlap=0.210706, diff=0.168497
  N= 16: baseline=0.374128, overlap=0.325480, diff=0.048648
  N= 17: baseline=0.370040, overlap=0.207258, diff=0.162782
  N= 18: baseline=0.368771, overlap=0.27

In [4]:
# Calculate theoretical minimum score if we could achieve overlap CSV scores for all N
theoretical_min = sum(overlap_scores.values())
print(f'Theoretical minimum (with overlaps): {theoretical_min:.6f}')
print(f'Current baseline: {sum(baseline_scores.values()):.6f}')
print(f'Target: 68.931058')
print(f'Gap from theoretical to target: {theoretical_min - 68.931058:.6f}')

Theoretical minimum (with overlaps): 67.727119
Current baseline: 70.734327
Target: 68.931058
Gap from theoretical to target: -1.203939


In [None]:
# Let's try running our compiled bbox3_local optimizer
import subprocess
import shutil

# Copy baseline to working directory
shutil.copy(baseline_path, '/home/code/submission.csv')

# Run the optimizer with high parameters
result = subprocess.run(
    ['/home/code/bbox3_local', '-n', '50000', '-r', '64'],
    capture_output=True,
    text=True,
    cwd='/home/code',
    timeout=600  # 10 minutes
)

print("STDOUT:")
print(result.stdout)
print("\nSTDERR:")
print(result.stderr)
print("\nReturn code:", result.returncode)