# Evolver Loop 1 - LB Feedback Analysis

## Key Findings:
- Baseline CV: 70.6761, LB: 70.6761 (perfect match - no gap)
- Target: 68.889699
- Gap to close: 1.786 points (2.5%)

## Strategy Analysis:
1. Large N (51-200) contributes 73% of score - this is where improvements matter most
2. N=1 is already optimal at 45Â° rotation
3. Ensemble approach from top kernels takes best per-N from multiple sources

In [1]:
import pandas as pd
import numpy as np
import os
import glob
from numba import njit
import math

# Tree polygon template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs,ys,degs,tx,ty):
    n=xs.size; V=tx.size
    mnx=1e300; mny=1e300; mxx=-1e300; mxy=-1e300
    for i in range(n):
        r=degs[i]*math.pi/180.0
        c=math.cos(r); s=math.sin(r)
        xi=xs[i]; yi=ys[i]
        for j in range(V):
            X=c*tx[j]-s*ty[j]+xi
            Y=s*tx[j]+c*ty[j]+yi
            if X<mnx: mnx=X
            if X>mxx: mxx=X
            if Y<mny: mny=Y
            if Y>mxy: mxy=Y
    side=max(mxx-mnx,mxy-mny)
    return side*side/n

def strip(a):
    return np.array([float(str(v).replace('s','')) for v in a],np.float64)

tx, ty = make_polygon_template()
print('Functions defined')

Functions defined


In [2]:
# Find all available CSV files in preoptimized folder
preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'
csv_files = []
for root, dirs, files in os.walk(preopt_dir):
    for f in files:
        if f.endswith('.csv'):
            csv_files.append(os.path.join(root, f))

print(f'Found {len(csv_files)} CSV files')
for f in csv_files[:10]:
    print(f'  {f}')

Found 30 CSV files
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/telegram_extracted/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/telegram_extracted/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/submission_JKoT4.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/New_Tree_144_196.csv


In [3]:
# Load all CSVs and find best per-N
best = {n: {'score': 1e300, 'data': None, 'src': None} for n in range(1, 201)}

for fp in csv_files:
    try:
        df = pd.read_csv(fp)
    except Exception:
        continue
    if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
        continue
    df = df.copy()
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    for n, g in df.groupby('N'):
        if n < 1 or n > 200:
            continue
        xs = strip(g['x'].to_numpy())
        ys = strip(g['y'].to_numpy())
        ds = strip(g['deg'].to_numpy())
        sc = score_group(xs, ys, ds, tx, ty)
        if sc < best[n]['score']:
            best[n]['score'] = float(sc)
            best[n]['data'] = g.drop(columns=['N']).copy()
            best[n]['src'] = os.path.basename(fp)

print('Best per-N analysis complete')

Best per-N analysis complete


In [4]:
# Calculate ensemble score
ensemble_score = sum(best[n]['score'] for n in range(1, 201))
print(f'Ensemble score (best per-N): {ensemble_score:.6f}')
print(f'Baseline score: 70.676102')
print(f'Target score: 68.889699')
print(f'Improvement from ensemble: {70.676102 - ensemble_score:.6f}')
print(f'Gap remaining: {ensemble_score - 68.889699:.6f}')

Ensemble score (best per-N): 70.676102
Baseline score: 70.676102
Target score: 68.889699
Improvement from ensemble: -0.000000
Gap remaining: 1.786403


In [5]:
# Show which sources contribute to ensemble
source_counts = {}
for n in range(1, 201):
    src = best[n]['src']
    if src:
        source_counts[src] = source_counts.get(src, 0) + 1

print('\nSource contributions to ensemble:')
for src, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f'  {src}: {count} N values')


Source contributions to ensemble:
  ensemble.csv: 200 N values


In [6]:
# Compare baseline vs ensemble per-N
baseline_df = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv')
baseline_df['N'] = baseline_df['id'].astype(str).str.split('_').str[0].astype(int)

improvements = []
for n in range(1, 201):
    g = baseline_df[baseline_df['N'] == n]
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    baseline_score = score_group(xs, ys, ds, tx, ty)
    ensemble_score_n = best[n]['score']
    improvement = baseline_score - ensemble_score_n
    if improvement > 1e-9:
        improvements.append((n, baseline_score, ensemble_score_n, improvement, best[n]['src']))

print(f'\nN values where ensemble beats baseline ({len(improvements)} total):')
for n, base, ens, imp, src in sorted(improvements, key=lambda x: -x[3])[:20]:
    print(f'  N={n:3d}: baseline={base:.6f}, ensemble={ens:.6f}, improvement={imp:.6f} from {src}')


N values where ensemble beats baseline (0 total):


In [7]:
# Create ensemble submission
rows = []
for n in range(1, 201):
    if best[n]['data'] is not None:
        rows.append(best[n]['data'])

ensemble_df = pd.concat(rows, ignore_index=True)
ensemble_df['sn'] = ensemble_df['id'].str.split('_').str[0].astype(int)
ensemble_df['si'] = ensemble_df['id'].str.split('_').str[1].astype(int)
ensemble_df = ensemble_df.sort_values(['sn', 'si']).drop(columns=['sn', 'si'])
ensemble_df = ensemble_df[['id', 'x', 'y', 'deg']]

print(f'Ensemble submission has {len(ensemble_df)} rows')
print(ensemble_df.head())

Ensemble submission has 20100 rows
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  


In [8]:
# Save ensemble submission
os.makedirs('/home/code/experiments/001_ensemble', exist_ok=True)
ensemble_df.to_csv('/home/code/experiments/001_ensemble/submission.csv', index=False)
print('Saved to /home/code/experiments/001_ensemble/submission.csv')

# Also copy to submission folder
ensemble_df.to_csv('/home/submission/submission.csv', index=False)
print('Copied to /home/submission/submission.csv')

Saved to /home/code/experiments/001_ensemble/submission.csv
Copied to /home/submission/submission.csv


In [9]:
# Verify ensemble score
verify_score = 0
for n in range(1, 201):
    g = ensemble_df[ensemble_df['id'].str.startswith(f'{n:03d}_')]
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    sc = score_group(xs, ys, ds, tx, ty)
    verify_score += sc

print(f'Verified ensemble score: {verify_score:.6f}')
print(f'Target: 68.889699')
print(f'Gap: {verify_score - 68.889699:.6f}')

# Save metrics
import json
metrics = {
    'cv_score': verify_score,
    'target_score': 68.889699,
    'gap': verify_score - 68.889699,
    'improvement_from_baseline': 70.676102 - verify_score
}
with open('/home/code/experiments/001_ensemble/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print('Saved metrics')

Verified ensemble score: 70.676102
Target: 68.889699
Gap: 1.786403
Saved metrics
