# Evolver Loop 2 Analysis

Systematically scan all snapshots to find the best per-N configurations.
This is a comprehensive scan of ~100 snapshot folders.

In [None]:
import pandas as pd
import numpy as np
import math
from numba import njit
import os
from glob import glob
import json

# Tree vertices
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

In [None]:
@njit
def score_group(xs, ys, degs, tx, ty):
    """Calculate score for a single N configuration"""
    n = xs.size
    V = tx.size
    mnx = mny = 1e300
    mxx = mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r)
        s = math.sin(r)
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xs[i]
            Y = s * tx[j] + c * ty[j] + ys[i]
            mnx = min(mnx, X)
            mxx = max(mxx, X)
            mny = min(mny, Y)
            mxy = max(mxy, Y)
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

In [None]:
def parse_submission(filepath):
    """Parse submission CSV and return dict of N -> (xs, ys, degs)"""
    try:
        df = pd.read_csv(filepath)
        if len(df) == 0:
            return None, None
        
        # Parse values (remove 's' prefix)
        df['x_val'] = df['x'].str.replace('s', '').astype(float)
        df['y_val'] = df['y'].str.replace('s', '').astype(float)
        df['deg_val'] = df['deg'].str.replace('s', '').astype(float)
        
        # Extract N from id (format: NNN_idx)
        df['N'] = df['id'].str.split('_').str[0].astype(int)
        
        configs = {}
        for n, group in df.groupby('N'):
            xs = group['x_val'].values
            ys = group['y_val'].values
            degs = group['deg_val'].values
            configs[n] = (xs, ys, degs)
        
        return configs, df
    except Exception as e:
        return None, None

In [None]:
def calculate_scores_by_n(configs, tx, ty):
    """Calculate scores for each N value"""
    scores_by_n = {}
    for n in range(1, 201):
        if n in configs:
            xs, ys, degs = configs[n]
            score = score_group(xs, ys, degs, tx, ty)
            scores_by_n[n] = score
    return scores_by_n

In [None]:
# Find all submission CSVs in snapshots
snapshot_base = '/home/nonroot/snapshots/santa-2025'
snapshot_dirs = sorted(glob(f'{snapshot_base}/*/'))

print(f'Found {len(snapshot_dirs)} snapshot directories')

# Collect all submission files
all_submissions = []

# Check preoptimized folder
preopt_files = [
    '/home/code/preoptimized/submission.csv',
    '/home/code/preoptimized/best_snapshot.csv',
    '/home/code/preoptimized/saspav_best.csv',
    '/home/code/preoptimized/smartmanoj_submission.csv',
    '/home/code/preoptimized/ensemble.csv',
]
for f in preopt_files:
    if os.path.exists(f):
        all_submissions.append(('preopt', f))

# Check each snapshot
for snap_dir in snapshot_dirs:
    snap_id = os.path.basename(snap_dir.rstrip('/'))
    # Check common locations for submission files
    possible_paths = [
        f'{snap_dir}submission/submission.csv',
        f'{snap_dir}code/submission.csv',
        f'{snap_dir}submission.csv',
    ]
    for path in possible_paths:
        if os.path.exists(path):
            all_submissions.append((snap_id, path))
            break

print(f'Found {len(all_submissions)} submission files to scan')

In [None]:
# Score all submissions and track best per-N
best_scores_by_n = {n: float('inf') for n in range(1, 201)}
best_source_by_n = {n: None for n in range(1, 201)}
best_configs_by_n = {n: None for n in range(1, 201)}

all_totals = {}

for source_id, filepath in all_submissions:
    configs, df = parse_submission(filepath)
    if configs is None:
        continue
    
    scores_by_n = calculate_scores_by_n(configs, TX, TY)
    total = sum(scores_by_n.values())
    all_totals[source_id] = total
    
    # Update best per-N
    for n in range(1, 201):
        if n in scores_by_n and scores_by_n[n] < best_scores_by_n[n]:
            best_scores_by_n[n] = scores_by_n[n]
            best_source_by_n[n] = source_id
            best_configs_by_n[n] = configs[n]

print(f'Scanned {len(all_totals)} valid submissions')

In [None]:
# Show top 10 submissions by total score
sorted_totals = sorted(all_totals.items(), key=lambda x: x[1])
print('\nTop 10 submissions by total score:')
for source_id, total in sorted_totals[:10]:
    print(f'  {source_id}: {total:.6f}')

In [None]:
# Calculate optimal ensemble score
ensemble_total = sum(best_scores_by_n.values())
print(f'\nOptimal ensemble from ALL sources: {ensemble_total:.6f}')
print(f'Best single submission: {sorted_totals[0][1]:.6f}')
print(f'Improvement from ensemble: {sorted_totals[0][1] - ensemble_total:.6f}')

In [None]:
# Show source breakdown
source_counts = {}
for n in range(1, 201):
    source = best_source_by_n[n]
    source_counts[source] = source_counts.get(source, 0) + 1

print('\nSource breakdown for optimal ensemble:')
for source, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f'  {source}: {count} N values')

In [None]:
# Show which N values improved vs best_snapshot
best_snapshot_configs, _ = parse_submission('/home/code/preoptimized/best_snapshot.csv')
best_snapshot_scores = calculate_scores_by_n(best_snapshot_configs, TX, TY)

print('\nN values where ensemble beats best_snapshot:')
improvements = []
for n in range(1, 201):
    if n in best_snapshot_scores:
        improvement = best_snapshot_scores[n] - best_scores_by_n[n]
        if improvement > 1e-9:
            improvements.append((n, improvement, best_source_by_n[n]))

if improvements:
    for n, imp, source in sorted(improvements, key=lambda x: -x[1])[:20]:
        print(f'  N={n}: improvement {imp:.6f} from {source}')
else:
    print('  No improvements found - best_snapshot already optimal')

In [None]:
# Save the optimal ensemble
def generate_submission(configs_by_n, output_path):
    rows = []
    for n in range(1, 201):
        xs, ys, degs = configs_by_n[n]
        for i in range(len(xs)):
            row = {
                'id': f'{n:03d}_{i}',
                'x': f's{xs[i]}',
                'y': f's{ys[i]}',
                'deg': f's{degs[i]}'
            }
            rows.append(row)
    df = pd.DataFrame(rows)
    df.to_csv(output_path, index=False)
    return df

# Only save if we found improvements
if ensemble_total < sorted_totals[0][1] - 1e-9:
    os.makedirs('/home/submission', exist_ok=True)
    submission_df = generate_submission(best_configs_by_n, '/home/submission/submission.csv')
    print(f'\nSaved optimal ensemble to /home/submission/submission.csv')
    print(f'Total rows: {len(submission_df)}')
    print(f'Score: {ensemble_total:.6f}')
else:
    print(f'\nNo improvement over best single submission - keeping best_snapshot')
    # Copy best_snapshot to submission
    import shutil
    os.makedirs('/home/submission', exist_ok=True)
    shutil.copy('/home/code/preoptimized/best_snapshot.csv', '/home/submission/submission.csv')
    print(f'Copied best_snapshot.csv to /home/submission/submission.csv')

In [None]:
# Verify the submission
verify_configs, _ = parse_submission('/home/submission/submission.csv')
verify_scores = calculate_scores_by_n(verify_configs, TX, TY)
verify_total = sum(verify_scores.values())
print(f'\nVerification of saved submission: {verify_total:.6f}')