# Evolver Loop 3 Analysis

Analyze all available datasets and their scores to understand the gap to target.

In [None]:
import numpy as np
import pandas as pd
import os
import glob
from numba import njit
import math

@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

tx, ty = make_polygon_template()
print('Scoring functions ready')

In [None]:
# List all available CSV files
all_sources = [
    # Original sources
    '/home/code/preoptimized_submission.csv',
    '/home/code/datasets/santa-2025.csv',
    '/home/code/datasets/71.97.csv',
    '/home/code/datasets/72.49.csv',
    '/home/code/datasets/submission.csv',
    '/home/code/datasets/jazivxt_output/submission.csv',
    '/home/code/datasets/eazy_output/submission.csv',
    '/home/code/datasets/ashraful_output/submission.csv',
    # New sources
    '/home/code/datasets/bucket-of-chump/submission.csv',
    '/home/code/datasets/telegram/71.97.csv',
    '/home/code/datasets/telegram/72.49.csv',
    '/home/code/datasets/saspav/santa-2025.csv',
    '/home/code/datasets/chistyakov/submission_best.csv',
    '/home/code/datasets/egortrushin_output/submission.csv',
    '/home/code/datasets/chistyakov_kernel_output/submission.csv',
]

# Check which exist and score them
results = []
for fp in all_sources:
    if not os.path.exists(fp):
        continue
    try:
        df = pd.read_csv(fp)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        total = 0.0
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].to_numpy())
            ys = strip(g['y'].to_numpy())
            ds = strip(g['deg'].to_numpy())
            sc = score_group(xs, ys, ds, tx, ty)
            total += sc
        results.append((fp, total))
        print(f'{os.path.basename(fp):40s} -> {total:.6f}')
    except Exception as e:
        print(f'{fp}: ERROR - {e}')

results.sort(key=lambda x: x[1])
print('\n=== Sorted by Score ===')
for fp, score in results:
    print(f'{score:.6f} <- {fp}')

In [None]:
# Find the best source
best_source = results[0]
print(f'Best source: {best_source[0]} with score {best_source[1]:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {best_source[1] - 68.922808:.6f}')

In [None]:
# Now create an ensemble from ALL sources
best = {n: {'score': 1e300, 'data': None, 'src': None} for n in range(1, 201)}

for fp in all_sources:
    if not os.path.exists(fp):
        continue
    try:
        df = pd.read_csv(fp)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        df = df.copy()
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].to_numpy())
            ys = strip(g['y'].to_numpy())
            ds = strip(g['deg'].to_numpy())
            sc = score_group(xs, ys, ds, tx, ty)
            if sc < best[n]['score']:
                best[n]['score'] = float(sc)
                best[n]['data'] = g.drop(columns=['N']).copy()
                best[n]['src'] = os.path.basename(fp)
    except Exception as e:
        print(f'Error with {fp}: {e}')

# Override N=1 with optimal
manual_data = pd.DataFrame({
    'id': ['001_0'],
    'x': ['s0.0'],
    'y': ['s0.0'],
    'deg': ['s45.0']
})
xs = strip(manual_data['x'].to_numpy())
ys = strip(manual_data['y'].to_numpy())
ds = strip(manual_data['deg'].to_numpy())
sc = score_group(xs, ys, ds, tx, ty)
if sc < best[1]['score']:
    best[1]['score'] = float(sc)
    best[1]['data'] = manual_data.copy()
    best[1]['src'] = 'optimal_n1'

print('Ensemble computed')

In [None]:
# Calculate ensemble score and source usage
rows = []
used = {}
total = 0.0

for n in range(1, 201):
    entry = best[n]
    if entry['data'] is None:
        print(f'Warning: No data for N={n}')
        continue
    rows.append(entry['data'])
    used[entry['src']] = used.get(entry['src'], 0) + 1
    total += entry['score']

print('\n=== Source Usage ===')
for src, count in sorted(used.items(), key=lambda x: -x[1]):
    print(f'  {src}: {count} N values')

print(f'\n=== Ensemble Score ===')
print(f'Total score: {total:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {total - 68.922808:.6f}')
print(f'Gap %: {(total - 68.922808) / 68.922808 * 100:.2f}%')

In [None]:
# Show worst N values
print('\n=== Worst N Values (top 30) ===')
scores_list = []
for n in range(1, 201):
    scores_list.append((n, best[n]['score'], best[n]['src']))

scores_list.sort(key=lambda x: -x[1])
for n, score, src in scores_list[:30]:
    print(f'  N={n:3d}: {score:.6f} from {src}')