# Loop 3 Analysis: Comprehensive Ensemble from ALL Sources

Goal: Find the best configuration for each N from ALL available pre-optimized sources.

Current best: 70.676816 (gap to target: 1.75 points)
Target: 68.922808

In [None]:
import math
import numpy as np
import pandas as pd
from numba import njit
import os
from glob import glob

# Tree polygon template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x = np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2], np.float64)
    y = np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1], np.float64)
    return x, y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c*tx[j] - s*ty[j] + xi
            Y = s*tx[j] + c*ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

tx, ty = make_polygon_template()
_ = score_group(np.array([0.0]), np.array([0.0]), np.array([45.0]), tx, ty)
print('JIT compiled')

In [None]:
def strip(a):
    return np.array([float(str(v).replace('s','')) for v in a], np.float64)

def load_submission(filepath):
    try:
        df = pd.read_csv(filepath)
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        configs = {}
        for n, g in df.groupby('N'):
            xs = strip(g['x'].to_numpy())
            ys = strip(g['y'].to_numpy())
            degs = strip(g['deg'].to_numpy())
            configs[n] = (xs, ys, degs)
        return configs
    except Exception as e:
        print(f'Error loading {filepath}: {e}')
        return None

print('Helper functions defined')

In [None]:
# Find ALL CSV files from snapshots
base_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/'
all_csvs = glob(os.path.join(base_dir, '**/*.csv'), recursive=True)
print(f'Found {len(all_csvs)} CSV files in preoptimized directory')
for f in all_csvs:
    print(f'  {f}')

In [None]:
# Load all sources and compute scores for each N
print('Loading all sources and computing per-N scores...')
print('='*80)

all_sources = {}
for csv_path in all_csvs:
    name = os.path.basename(csv_path)
    configs = load_submission(csv_path)
    if configs and len(configs) == 200:
        # Compute score for each N
        n_scores = {}
        for n in range(1, 201):
            if n in configs:
                xs, ys, degs = configs[n]
                n_scores[n] = score_group(xs, ys, degs, tx, ty)
        total = sum(n_scores.values())
        all_sources[csv_path] = {'configs': configs, 'n_scores': n_scores, 'total': total}
        print(f'{name}: {total:.6f}')
    else:
        print(f'{name}: SKIPPED (incomplete or error)')

print('='*80)
print(f'Loaded {len(all_sources)} complete sources')

In [None]:
# Find best config for each N across ALL sources
print('\nFinding best config for each N across all sources...')

best_for_n = {}  # n -> (score, source_path, config)
for source_path, data in all_sources.items():
    for n in range(1, 201):
        score = data['n_scores'].get(n, float('inf'))
        if n not in best_for_n or score < best_for_n[n][0]:
            best_for_n[n] = (score, source_path, data['configs'][n])

# Calculate ensemble score
ensemble_score = sum(best_for_n[n][0] for n in range(1, 201))
print(f'\nEnsemble score (best per N): {ensemble_score:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {ensemble_score - 68.922808:.6f}')

# Show which sources contributed
source_counts = {}
for n in range(1, 201):
    src = os.path.basename(best_for_n[n][1])
    source_counts[src] = source_counts.get(src, 0) + 1

print('\nSource contributions:')
for src, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f'  {src}: {count} configs')

In [None]:
# Show top 20 N values with highest scores (most room for improvement)
print('\nTop 20 N values with highest scores (most room for improvement):')
n_scores_sorted = sorted([(n, best_for_n[n][0]) for n in range(1, 201)], key=lambda x: -x[1])
for n, score in n_scores_sorted[:20]:
    src = os.path.basename(best_for_n[n][1])
    print(f'  N={n:3d}: {score:.6f} (from {src})')

In [None]:
# Check if ensemble has any improvements over current best
current_best_path = '/home/code/experiments/007_preoptimized_baseline/submission.csv'
current_configs = load_submission(current_best_path)

if current_configs:
    current_n_scores = {}
    for n in range(1, 201):
        xs, ys, degs = current_configs[n]
        current_n_scores[n] = score_group(xs, ys, degs, tx, ty)
    current_total = sum(current_n_scores.values())
    print(f'Current best total: {current_total:.6f}')
    
    # Find improvements
    improvements = []
    for n in range(1, 201):
        diff = current_n_scores[n] - best_for_n[n][0]
        if diff > 1e-9:
            improvements.append((n, diff, current_n_scores[n], best_for_n[n][0]))
    
    if improvements:
        print(f'\nFound {len(improvements)} improvements:')
        improvements.sort(key=lambda x: -x[1])
        for n, diff, old, new in improvements[:20]:
            src = os.path.basename(best_for_n[n][1])
            print(f'  N={n:3d}: {old:.6f} -> {new:.6f} (improvement: {diff:.6f}) from {src}')
        total_improvement = sum(x[1] for x in improvements)
        print(f'\nTotal potential improvement: {total_improvement:.6f}')
    else:
        print('No improvements found - current solution is already the best ensemble')