# Evolver Loop 2 Analysis: Overlap Repair Strategy

## Key Insight from Evaluator
The ensemble of best configs scores 67.77 (BELOW target of 68.93!) but has overlaps.
The path to victory is clear: REPAIR THE OVERLAPS.

## Analysis Goals
1. Understand the overlap situation in the best configs
2. Analyze how much improvement is locked behind overlaps
3. Design an overlap repair strategy

In [None]:
import numpy as np
import pandas as pd
import math
from numba import njit
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import os

# Tree polygon template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

tx, ty = make_polygon_template()
print('Template loaded')

In [None]:
# Overlap detection functions
def get_shapely_polygon(cx, cy, deg, tx, ty):
    r = deg * np.pi / 180.0
    c = np.cos(r)
    s = np.sin(r)
    px = c * tx - s * ty + cx
    py = s * tx + c * ty + cy
    return Polygon(zip(px, py))

def has_overlap(xs, ys, degs, tx, ty):
    n = len(xs)
    if n <= 1:
        return False
    polygons = [get_shapely_polygon(xs[i], ys[i], degs[i], tx, ty) for i in range(n)]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def find_overlapping_pairs(xs, ys, degs, tx, ty):
    """Find all pairs of overlapping trees"""
    n = len(xs)
    if n <= 1:
        return []
    polygons = [get_shapely_polygon(xs[i], ys[i], degs[i], tx, ty) for i in range(n)]
    tree_index = STRtree(polygons)
    pairs = []
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx <= i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                pairs.append((i, idx))
    return pairs

print('Overlap detection functions loaded')

In [None]:
# Load the ensemble with overlaps (67.77 score)
ensemble_path = '/home/nonroot/snapshots/santa-2025/21108486172/code/experiments/001_baseline/ensemble_submission.csv'
df_ensemble = pd.read_csv(ensemble_path)
df_ensemble['N'] = df_ensemble['id'].astype(str).str.split('_').str[0].astype(int)

# Load baseline (70.73 score, no overlaps)
baseline_path = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv'
df_baseline = pd.read_csv(baseline_path)
df_baseline['N'] = df_baseline['id'].astype(str).str.split('_').str[0].astype(int)

print('Loaded ensemble and baseline')

In [None]:
# Analyze overlaps in the ensemble
overlap_analysis = []

for n in range(1, 201):
    g_ens = df_ensemble[df_ensemble['N'] == n]
    g_base = df_baseline[df_baseline['N'] == n]
    
    xs_ens = strip(g_ens['x'].to_numpy())
    ys_ens = strip(g_ens['y'].to_numpy())
    ds_ens = strip(g_ens['deg'].to_numpy())
    
    xs_base = strip(g_base['x'].to_numpy())
    ys_base = strip(g_base['y'].to_numpy())
    ds_base = strip(g_base['deg'].to_numpy())
    
    score_ens = score_group(xs_ens, ys_ens, ds_ens, tx, ty)
    score_base = score_group(xs_base, ys_base, ds_base, tx, ty)
    
    pairs = find_overlapping_pairs(xs_ens, ys_ens, ds_ens, tx, ty)
    
    overlap_analysis.append({
        'n': n,
        'score_ens': score_ens,
        'score_base': score_base,
        'improvement': score_base - score_ens,
        'num_overlaps': len(pairs),
        'has_overlap': len(pairs) > 0
    })

df_analysis = pd.DataFrame(overlap_analysis)
print(f'Total ensemble score: {df_analysis["score_ens"].sum():.6f}')
print(f'Total baseline score: {df_analysis["score_base"].sum():.6f}')
print(f'Potential improvement: {df_analysis["improvement"].sum():.6f}')
print(f'\nN values with overlaps: {df_analysis["has_overlap"].sum()}')
print(f'N values without overlaps: {(~df_analysis["has_overlap"]).sum()}')

In [None]:
# Show N values with most improvement potential (locked behind overlaps)
df_with_overlaps = df_analysis[df_analysis['has_overlap']].sort_values('improvement', ascending=False)
print('Top 20 N values with overlaps and improvement potential:')
print(df_with_overlaps[['n', 'score_ens', 'score_base', 'improvement', 'num_overlaps']].head(20).to_string())

print(f'\nTotal improvement locked behind overlaps: {df_with_overlaps["improvement"].sum():.6f}')

In [None]:
# Check N values WITHOUT overlaps - these are free improvements!
df_no_overlaps = df_analysis[~df_analysis['has_overlap']]
print(f'N values without overlaps: {len(df_no_overlaps)}')
print(f'Improvement from non-overlapping configs: {df_no_overlaps["improvement"].sum():.6f}')

if len(df_no_overlaps) > 0:
    print('\nThese N values can be used directly from ensemble:')
    for _, row in df_no_overlaps.iterrows():
        if row['improvement'] > 0.0001:
            print(f'  N={row["n"]}: improvement={row["improvement"]:.6f}')

In [None]:
# Analyze a specific N value with overlaps to understand the repair challenge
test_n = 10  # Pick a small N to analyze

g = df_ensemble[df_ensemble['N'] == test_n]
xs = strip(g['x'].to_numpy())
ys = strip(g['y'].to_numpy())
ds = strip(g['deg'].to_numpy())

pairs = find_overlapping_pairs(xs, ys, ds, tx, ty)
print(f'N={test_n}: {len(pairs)} overlapping pairs')
for i, j in pairs:
    print(f'  Trees {i} and {j} overlap')
    print(f'    Tree {i}: x={xs[i]:.4f}, y={ys[i]:.4f}, deg={ds[i]:.2f}')
    print(f'    Tree {j}: x={xs[j]:.4f}, y={ys[j]:.4f}, deg={ds[j]:.2f}')