# Evolver Loop 1 Analysis

## Goals:
1. Analyze N=1 theoretical optimal (is current solution optimal?)
2. Ensemble all pre-optimized submissions to find best per-N
3. Calculate potential improvement from ensemble

In [None]:
import pandas as pd
import numpy as np
import math
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
import glob
import os

getcontext().prec = 30

# Tree polygon vertices
TREE_VERTICES = [
    (0.0, 0.8),      # Tip
    (0.125, 0.5),    # Right top tier
    (0.0625, 0.5),
    (0.2, 0.25),     # Right mid tier
    (0.1, 0.25),
    (0.35, 0.0),     # Right base
    (0.075, 0.0),    # Right trunk
    (0.075, -0.2),
    (-0.075, -0.2),  # Left trunk
    (-0.075, 0.0),
    (-0.35, 0.0),    # Left base
    (-0.1, 0.25),    # Left mid tier
    (-0.2, 0.25),
    (-0.0625, 0.5),  # Left top tier
    (-0.125, 0.5),
]

print(f"Tree has {len(TREE_VERTICES)} vertices")

In [None]:
# Analyze N=1 theoretical optimal
# For a single tree, the optimal bounding box is achieved at some rotation angle

def get_bbox_at_angle(angle_deg):
    """Get bounding box side length for a single tree at given angle"""
    poly = Polygon(TREE_VERTICES)
    rotated = affinity.rotate(poly, angle_deg, origin=(0, 0))
    minx, miny, maxx, maxy = rotated.bounds
    return max(maxx - minx, maxy - miny)

# Search for optimal angle
angles = np.linspace(0, 90, 901)  # 0.1 degree resolution
bbox_sides = [get_bbox_at_angle(a) for a in angles]

optimal_idx = np.argmin(bbox_sides)
optimal_angle = angles[optimal_idx]
optimal_side = bbox_sides[optimal_idx]

print(f"Optimal angle for N=1: {optimal_angle:.1f} degrees")
print(f"Optimal side length: {optimal_side:.6f}")
print(f"Optimal N=1 score contribution: {optimal_side**2 / 1:.6f}")

# Current baseline N=1 score
current_n1_side = 0.813  # from baseline analysis
current_n1_score = current_n1_side**2 / 1
print(f"\nCurrent N=1 side: {current_n1_side}")
print(f"Current N=1 score: {current_n1_score:.6f}")
print(f"Potential improvement: {current_n1_score - optimal_side**2:.6f}")

In [None]:
# Fine-tune around optimal angle
fine_angles = np.linspace(optimal_angle - 1, optimal_angle + 1, 2001)
fine_sides = [get_bbox_at_angle(a) for a in fine_angles]

fine_optimal_idx = np.argmin(fine_sides)
fine_optimal_angle = fine_angles[fine_optimal_idx]
fine_optimal_side = fine_sides[fine_optimal_idx]

print(f"Fine-tuned optimal angle: {fine_optimal_angle:.4f} degrees")
print(f"Fine-tuned optimal side: {fine_optimal_side:.8f}")
print(f"Fine-tuned N=1 score: {fine_optimal_side**2:.8f}")

In [None]:
# Load all pre-optimized submissions and find best per-N
from numba import njit

@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

tx, ty = make_polygon_template()
print("Scoring functions ready")

In [None]:
# Find all CSV files in snapshots
csv_files = glob.glob('/home/nonroot/snapshots/santa-2025/**/*.csv', recursive=True)
print(f"Found {len(csv_files)} CSV files")

# Filter out sample_submission.csv
csv_files = [f for f in csv_files if 'sample_submission' not in f]
print(f"After filtering: {len(csv_files)} CSV files")

In [None]:
# Load and score all submissions
from tqdm import tqdm

best = {n: {'score': 1e300, 'data': None, 'src': None} for n in range(1, 201)}

for fp in tqdm(csv_files, desc='Scanning CSVs'):
    try:
        df = pd.read_csv(fp)
    except Exception:
        continue
    
    if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
        continue
    
    df = df.copy()
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    
    for n, g in df.groupby('N'):
        if n < 1 or n > 200:
            continue
        if len(g) != n:
            continue
        
        xs = strip(g['x'].to_numpy())
        ys = strip(g['y'].to_numpy())
        ds = strip(g['deg'].to_numpy())
        sc = score_group(xs, ys, ds, tx, ty)
        
        if sc < best[n]['score']:
            best[n]['score'] = float(sc)
            best[n]['data'] = g.drop(columns=['N']).copy()
            best[n]['src'] = fp.split('/')[-1]

print("Scanning complete")

In [None]:
# Override N=1 with optimal value (45 degrees)
manual_data = pd.DataFrame({
    'id': ['001_0'],
    'x': ['s0.0'],
    'y': ['s0.0'],
    'deg': [f's{fine_optimal_angle}']
})
xs = strip(manual_data['x'].to_numpy())
ys = strip(manual_data['y'].to_numpy())
ds = strip(manual_data['deg'].to_numpy())
sc = score_group(xs, ys, ds, tx, ty)
best[1]['score'] = float(sc)
best[1]['data'] = manual_data.copy()
best[1]['src'] = 'optimal_computed'

print(f"N=1 optimal score: {sc:.8f}")

In [None]:
# Calculate total ensemble score
total_score = sum(best[n]['score'] for n in range(1, 201))
print(f"\nEnsemble total score: {total_score:.6f}")
print(f"Baseline score: 70.676102")
print(f"Improvement: {70.676102 - total_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap to target: {total_score - 68.919154:.6f}")

In [None]:
# Show top improvements from ensemble
baseline_df = pd.read_csv('/home/code/experiments/001_baseline/submission.csv')
baseline_df['N'] = baseline_df['id'].astype(str).str.split('_').str[0].astype(int)

improvements = []
for n in range(1, 201):
    g = baseline_df[baseline_df['N'] == n]
    if len(g) != n:
        continue
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    baseline_score = score_group(xs, ys, ds, tx, ty)
    ensemble_score = best[n]['score']
    improvement = baseline_score - ensemble_score
    if improvement > 1e-9:
        improvements.append({
            'n': n,
            'baseline': baseline_score,
            'ensemble': ensemble_score,
            'improvement': improvement,
            'source': best[n]['src']
        })

improvements_df = pd.DataFrame(improvements)
if len(improvements_df) > 0:
    print(f"\nFound {len(improvements_df)} N values with improvements:")
    print(improvements_df.sort_values('improvement', ascending=False).head(20))
else:
    print("No improvements found - baseline is already the best ensemble")

In [None]:
# Create ensemble submission
rows = []
for n in range(1, 201):
    if best[n]['data'] is not None:
        rows.append(best[n]['data'])

ensemble_df = pd.concat(rows, ignore_index=True)
ensemble_df['sn'] = ensemble_df['id'].str.split('_').str[0].astype(int)
ensemble_df['si'] = ensemble_df['id'].str.split('_').str[1].astype(int)
ensemble_df = ensemble_df.sort_values(['sn', 'si']).drop(columns=['sn', 'si'])
ensemble_df = ensemble_df[['id', 'x', 'y', 'deg']]

print(f"Ensemble submission shape: {ensemble_df.shape}")
print(f"Expected: (20100, 4)")

# Save ensemble
os.makedirs('/home/code/experiments/002_ensemble', exist_ok=True)
ensemble_df.to_csv('/home/code/experiments/002_ensemble/submission.csv', index=False)
print("\nSaved to /home/code/experiments/002_ensemble/submission.csv")

In [None]:
# Verify ensemble score
verify_df = pd.read_csv('/home/code/experiments/002_ensemble/submission.csv')
verify_df['N'] = verify_df['id'].astype(str).str.split('_').str[0].astype(int)

verify_total = 0
for n in range(1, 201):
    g = verify_df[verify_df['N'] == n]
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    verify_total += score_group(xs, ys, ds, tx, ty)

print(f"Verified ensemble score: {verify_total:.6f}")
print(f"Expected: {total_score:.6f}")
print(f"Match: {abs(verify_total - total_score) < 1e-6}")

In [None]:
# Copy to submission folder
import shutil
shutil.copy('/home/code/experiments/002_ensemble/submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

# Save metrics
import json
metrics = {'cv_score': verify_total}
with open('/home/code/experiments/002_ensemble/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")