# Evolver Loop 2 Analysis

Analyze available pre-optimized solutions and identify best per-N configurations.

In [1]:
import numpy as np
import pandas as pd
import glob
import os
from numba import njit
import math

# Tree geometry
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs,ys,degs,tx,ty):
    n=xs.size; V=tx.size
    mnx=1e300; mny=1e300; mxx=-1e300; mxy=-1e300
    for i in range(n):
        r=degs[i]*math.pi/180.0
        c=math.cos(r); s=math.sin(r)
        xi=xs[i]; yi=ys[i]
        for j in range(V):
            X=c*tx[j]-s*ty[j]+xi
            Y=s*tx[j]+c*ty[j]+yi
            if X<mnx: mnx=X
            if X>mxx: mxx=X
            if Y<mny: mny=Y
            if Y>mxy: mxy=Y
    side=max(mxx-mnx,mxy-mny)
    return side*side/n

def strip(a):
    return np.array([float(str(v).replace('s','')) for v in a],np.float64)

tx, ty = make_polygon_template()
print('Functions defined')

Functions defined


In [2]:
# Find all CSV files
base_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/'
csv_files = glob.glob(base_path + '**/*.csv', recursive=True)
csv_files += glob.glob(base_path + '*.csv')
csv_files = sorted(set(csv_files))
print(f'Found {len(csv_files)} CSV files')

Found 30 CSV files


In [3]:
# Score each CSV file
def score_csv(filepath):
    try:
        df = pd.read_csv(filepath)
        if not {'id','x','y','deg'}.issubset(df.columns):
            return None
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        total = 0
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].to_numpy())
            ys = strip(g['y'].to_numpy())
            ds = strip(g['deg'].to_numpy())
            sc = score_group(xs, ys, ds, tx, ty)
            total += sc
        return total
    except Exception as e:
        return None

scores = {}
for fp in csv_files:
    score = score_csv(fp)
    if score is not None:
        scores[fp] = score
        print(f'{os.path.basename(fp)}: {score:.6f}')

print(f'\nBest: {min(scores.values()):.6f}')
print(f'Best file: {min(scores, key=scores.get)}')

best_ensemble.csv: 70.676102


submission (77).csv: 72.135010
submission.csv: 70.676501


submission_best.csv: 70.926150
ensemble.csv: 70.676102
santa-2025.csv: 70.676102
submission.csv: 72.935294


submission_sa.csv: 72.935294
santa-2025.csv: 70.676102
New_Tree_144_196.csv: 72.927920
santa2025_ver2_v61.csv: 72.951925


santa2025_ver2_v63.csv: 72.947427
santa2025_ver2_v65.csv: 72.935294
santa2025_ver2_v66.csv: 72.938599
santa2025_ver2_v67.csv: 72.938567


santa2025_ver2_v68.csv: 72.939233
santa2025_ver2_v69.csv: 72.850110
santa2025_ver2_v76.csv: 72.826444
submission_70_926149550346.csv: 70.926150


submission_70_936673758122.csv: 70.936674
submission_JKoT1.csv: 72.489483
submission_JKoT2.csv: 72.489348
submission_JKoT3.csv: 72.489488


submission_JKoT4.csv: 72.489504
submission_opt1.csv: 70.990692
submission.csv: 70.676501
71.97.csv: 71.972027


72.49.csv: 72.495739
71.97.csv: 71.972027
72.49.csv: 72.495739

Best: 70.676102
Best file: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv


In [4]:
# Find best per-N across all sources
best_per_n = {n: {'score': 1e300, 'src': None} for n in range(1, 201)}

for fp in csv_files:
    try:
        df = pd.read_csv(fp)
        if not {'id','x','y','deg'}.issubset(df.columns):
            continue
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].to_numpy())
            ys = strip(g['y'].to_numpy())
            ds = strip(g['deg'].to_numpy())
            sc = score_group(xs, ys, ds, tx, ty)
            if sc < best_per_n[n]['score']:
                best_per_n[n]['score'] = sc
                best_per_n[n]['src'] = os.path.basename(fp)
    except:
        continue

# Calculate total if we use best per-N
total_best = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f'Total score using best per-N: {total_best:.6f}')
print(f'\nBaseline (santa-2025.csv): 70.676102')
print(f'Improvement: {70.676102 - total_best:.6f}')

Total score using best per-N: 70.676102

Baseline (santa-2025.csv): 70.676102
Improvement: -0.000000


In [5]:
# Show which N values have different best sources
baseline_path = base_path + 'santa-2025.csv'
baseline_df = pd.read_csv(baseline_path)
baseline_df['N'] = baseline_df['id'].astype(str).str.split('_').str[0].astype(int)

baseline_scores = {}
for n, g in baseline_df.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    baseline_scores[n] = score_group(xs, ys, ds, tx, ty)

# Find N values where best source differs from baseline
print('N values with better sources than baseline:')
for n in range(1, 201):
    if best_per_n[n]['src'] != 'santa-2025.csv' and best_per_n[n]['score'] < baseline_scores[n] - 1e-9:
        improvement = baseline_scores[n] - best_per_n[n]['score']
        print(f'N={n}: baseline={baseline_scores[n]:.6f}, best={best_per_n[n]["score"]:.6f} from {best_per_n[n]["src"]} (Î”={improvement:.6f})')

N values with better sources than baseline:


In [None]:
# Summary of findings
print('='*60)
print('SUMMARY')
print('='*60)
print(f'Baseline score: 70.676102')
print(f'Best ensemble score: {total_best:.6f}')
print(f'Potential improvement: {70.676102 - total_best:.6f}')
print(f'Target: 68.919154')
print(f'Gap to target: {total_best - 68.919154:.6f}')