# Evolver Loop 2 Analysis

## Key Questions:
1. What is the gap to target and where does it come from?
2. Which N values have the most room for improvement?
3. What approaches haven't been tried yet?

In [None]:
import pandas as pd
import numpy as np
from numba import njit
import math

# Tree geometry
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace('s','')) for v in a], np.float64)

tx, ty = make_polygon_template()
print('Functions defined')

In [None]:
# Load baseline and calculate per-N scores
df = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv/santa-2025.csv')
df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)

per_n_scores = []
for n, g in df.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    sc = score_group(xs, ys, ds, tx, ty)
    per_n_scores.append({'N': n, 'score': sc})

per_n_df = pd.DataFrame(per_n_scores)
print(f'Total score: {per_n_df["score"].sum():.6f}')
print(f'Target: 68.919154')
print(f'Gap: {per_n_df["score"].sum() - 68.919154:.6f}')

In [None]:
# Analyze score distribution
per_n_df['cumsum'] = per_n_df['score'].cumsum()
per_n_df['pct_of_total'] = per_n_df['score'] / per_n_df['score'].sum() * 100

print('Top 20 score contributors:')
print(per_n_df.nlargest(20, 'score')[['N', 'score', 'pct_of_total']].to_string())

In [None]:
# Calculate efficiency (trees per unit area)
per_n_df['efficiency'] = per_n_df['N'] / (per_n_df['score'] * per_n_df['N'])  # trees / score contribution
per_n_df['side'] = np.sqrt(per_n_df['score'] * per_n_df['N'])  # bounding box side

print('\nLowest efficiency (most room for improvement):')
print(per_n_df.nsmallest(20, 'efficiency')[['N', 'score', 'side', 'efficiency']].to_string())

In [None]:
# What improvement is needed per N to reach target?
target = 68.919154
current = per_n_df['score'].sum()
gap = current - target

print(f'\nCurrent: {current:.6f}')
print(f'Target: {target:.6f}')
print(f'Gap: {gap:.6f} ({gap/current*100:.2f}%)')

# If we could improve each N by the same percentage, what would it be?
required_pct_improvement = gap / current * 100
print(f'\nRequired average improvement per N: {required_pct_improvement:.2f}%')

In [None]:
# Analyze which N values are likely to have room for improvement
# Large N values with lattice structure might be improvable

large_n = per_n_df[per_n_df['N'] >= 50].copy()
print(f'\nLarge N (>=50) contribution: {large_n["score"].sum():.6f} ({large_n["score"].sum()/current*100:.1f}%)')

small_n = per_n_df[per_n_df['N'] < 50].copy()
print(f'Small N (<50) contribution: {small_n["score"].sum():.6f} ({small_n["score"].sum()/current*100:.1f}%)')

In [None]:
# Key insight: The gap is 1.76 points (2.5%)
# If we can improve large N values by 5%, we'd gain:
large_n_improvement = large_n['score'].sum() * 0.05
print(f'\n5% improvement on large N (>=50): {large_n_improvement:.6f}')
print(f'This would close {large_n_improvement/gap*100:.1f}% of the gap')

# If we can improve small N values by 5%:
small_n_improvement = small_n['score'].sum() * 0.05
print(f'\n5% improvement on small N (<50): {small_n_improvement:.6f}')
print(f'This would close {small_n_improvement/gap*100:.1f}% of the gap')

In [None]:
# Summary of findings
print('='*60)
print('ANALYSIS SUMMARY')
print('='*60)
print(f'Current score: {current:.6f}')
print(f'Target score: {target:.6f}')
print(f'Gap to close: {gap:.6f} ({gap/current*100:.2f}%)')
print()
print('Key insights:')
print('1. Small N (<50) contributes more to total score')
print('2. Large N (>=50) might have more room for improvement via lattice')
print('3. The pre-optimized solution is at a very tight local optimum')
print('4. Standard SA optimization found NO improvements')
print()
print('Recommended approaches:')
print('1. Implement proper lattice-based SA for large N (egortrushin approach)')
print('2. Try fractional translation with micro-steps')
print('3. Run much longer optimization (hours, not minutes)')
print('4. Use population-based optimization with perturbation')