# Evolver Loop 6 Analysis

## Key Questions:
1. What is the gap between our score and the target?
2. What techniques have we NOT tried yet?
3. What do the top kernels do that we haven't implemented?
4. Can we find better source solutions?

In [None]:
import pandas as pd
import numpy as np
import json

# Load session state
with open('/home/code/session_state.json') as f:
    state = json.load(f)

print('=== EXPERIMENT HISTORY ===')
for exp in state['experiments']:
    print(f"{exp['id']}: {exp['name']} | CV: {exp['cv_score']:.4f}")

print('\n=== SUBMISSION HISTORY ===')
for sub in state['submissions']:
    lb = sub.get('lb_score', 'N/A')
    if isinstance(lb, (int, float)):
        print(f"{sub['experiment_id']}: CV={sub['cv_score']:.4f} | LB={lb:.4f}")
    else:
        print(f"{sub['experiment_id']}: CV={sub['cv_score']:.4f} | LB={lb}")

In [None]:
# Current status
best_cv = 84.712432
best_lb = 84.712432
target = 68.931058
lb_1 = 71.19  # Current LB #1

print('=== CURRENT STATUS ===')
print(f'Best CV: {best_cv:.6f}')
print(f'Best LB: {best_lb:.6f}')
print(f'Target: {target:.6f}')
print(f'Gap to target: {best_lb - target:.6f} ({(best_lb - target)/target*100:.1f}%)')
print(f'LB #1: {lb_1:.2f}')
print(f'Gap to LB #1: {best_lb - lb_1:.2f} ({(best_lb - lb_1)/lb_1*100:.1f}%)')

In [None]:
# Analyze what techniques we've tried vs what's available
print('=== TECHNIQUES TRIED ===')
techniques_tried = [
    'C++ bbox3 optimizer (SA + local search)',
    'Fix direction rotation optimization',
    'Backward propagation',
    'Ensemble of 125 CSV sources',
    'Extended optimization (-n 50000 -r 200)',
    'Grid-based initialization (zaburo style)',
]
for t in techniques_tried:
    print(f'  ✓ {t}')

print('\n=== TECHNIQUES NOT YET TRIED ===')
techniques_not_tried = [
    'Per-tree rotation optimization (individual tree angles)',
    'Genetic algorithm with crossover between configurations',
    'Different compaction strategies (center-out, boundary-in)',
    'Simulated annealing with different cooling schedules',
    'Multi-start with diverse initializations (random, spiral, hexagonal)',
    'Fractional translation with very fine steps (0.00001)',
    'Analytical solution for N=1 (single tree optimal rotation)',
    'Hybrid optimization (greedy + SA + local search)',
    'Targeted optimization of worst N values with 100k+ iterations',
]
for t in techniques_not_tried:
    print(f'  ✗ {t}')

In [None]:
# Load current best submission and analyze worst N values
df = pd.read_csv('/home/code/experiments/005_extended_optimization/submission_fd.csv')

# Fast scoring
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

def strip_s(val):
    s = str(val)
    return float(s[1:] if s.startswith('s') else s)

def score_group_fast(xs, ys, degs):
    n = len(xs)
    if n == 0:
        return float('inf')
    all_x, all_y = [], []
    for i in range(n):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        px = TX * c - TY * s + xs[i]
        py = TX * s + TY * c + ys[i]
        all_x.extend(px)
        all_y.extend(py)
    all_x, all_y = np.array(all_x), np.array(all_y)
    side = max(all_x.max() - all_x.min(), all_y.max() - all_y.min())
    return side * side / n

# Calculate scores per N
scores = {}
for n in range(1, 201):
    group = df[df['id'].str.startswith(f'{n:03d}_')]
    if len(group) == n:
        xs = group['x'].apply(strip_s).values
        ys = group['y'].apply(strip_s).values
        degs = group['deg'].apply(strip_s).values
        scores[n] = score_group_fast(xs, ys, degs)

total = sum(scores.values())
print(f'Total score: {total:.6f}')
print(f'Average score per N: {total/200:.6f}')

In [None]:
# Analyze worst N values
worst = sorted(scores.items(), key=lambda x: -x[1])[:30]
print('\n=== TOP 30 WORST N VALUES ===')
print('N\tScore\tContribution')
for n, score in worst:
    contrib = score / total * 100
    print(f'{n}\t{score:.6f}\t{contrib:.2f}%')

print(f'\nTop 30 worst contribute: {sum(s for n,s in worst)/total*100:.1f}% of total score')

In [None]:
# Analyze N=1 specifically - this should be trivial to optimize
n1_group = df[df['id'].str.startswith('001_')]
if len(n1_group) == 1:
    x = strip_s(n1_group['x'].values[0])
    y = strip_s(n1_group['y'].values[0])
    deg = strip_s(n1_group['deg'].values[0])
    
    print(f'\n=== N=1 ANALYSIS ===')
    print(f'Current position: ({x:.6f}, {y:.6f})')
    print(f'Current angle: {deg:.6f}°')
    print(f'Current score: {scores[1]:.6f}')
    
    # Try different angles to find optimal
    best_angle = deg
    best_score = scores[1]
    
    for angle in np.linspace(0, 360, 3601):  # 0.1 degree increments
        rad = np.radians(angle)
        c, s = np.cos(rad), np.sin(rad)
        px = TX * c - TY * s
        py = TX * s + TY * c
        side = max(px.max() - px.min(), py.max() - py.min())
        score = side * side
        if score < best_score:
            best_score = score
            best_angle = angle
    
    print(f'\nOptimal angle: {best_angle:.1f}°')
    print(f'Optimal score: {best_score:.6f}')
    print(f'Potential improvement: {scores[1] - best_score:.6f}')

In [None]:
# Calculate theoretical minimum for each N
# For a single tree, the minimum bounding box depends on rotation
# For multiple trees, we need to pack them efficiently

print('\n=== THEORETICAL ANALYSIS ===')

# Single tree dimensions at different angles
def tree_bbox(angle):
    rad = np.radians(angle)
    c, s = np.cos(rad), np.sin(rad)
    px = TX * c - TY * s
    py = TX * s + TY * c
    return max(px.max() - px.min(), py.max() - py.min())

# Find minimum bounding box for single tree
min_side = float('inf')
best_angle = 0
for angle in np.linspace(0, 180, 1801):
    side = tree_bbox(angle)
    if side < min_side:
        min_side = side
        best_angle = angle

print(f'Single tree minimum bounding box side: {min_side:.6f} at {best_angle:.1f}°')
print(f'Single tree minimum score: {min_side**2:.6f}')
print(f'Current N=1 score: {scores[1]:.6f}')
print(f'Gap: {scores[1] - min_side**2:.6f}')

In [None]:
# Analyze the gap between our score and target
print('\n=== GAP ANALYSIS ===')
print(f'Our score: {total:.6f}')
print(f'Target: {target:.6f}')
print(f'Gap: {total - target:.6f}')

# How much improvement needed per N on average?
avg_improvement_needed = (total - target) / 200
print(f'\nAverage improvement needed per N: {avg_improvement_needed:.6f}')

# If we could improve worst 30 N values by 50%, what would be the impact?
worst_30_total = sum(s for n,s in worst)
if_improved = worst_30_total * 0.5
improvement = worst_30_total - if_improved
print(f'\nIf worst 30 N improved by 50%: {improvement:.2f} points saved')
print(f'New total would be: {total - improvement:.2f}')
print(f'Still need: {total - improvement - target:.2f} more points')

In [None]:
# What kernels achieved what scores?
print('\n=== KERNEL SCORES ===')
kernel_scores = [
    ('zaburo', 88.33, 'Grid-based initialization'),
    ('crodoc hybrid', 87.38, 'Hybrid optimization'),
    ('crodoc neon', 85.25, 'Neon tree visualizer'),
    ('crodoc backpacking', 74.75, 'Backward propagation + dataset'),
    ('LB #1 (terry_u16)', 71.19, 'Unknown - private'),
    ('Target', 68.93, 'Unknown - private'),
]

for name, score, technique in kernel_scores:
    gap = score - target
    print(f'{name}: {score:.2f} (gap to target: {gap:.2f}) - {technique}')

print(f'\nOur score: {total:.2f} (gap to target: {total - target:.2f})')