# Evolver Loop 14 Analysis

## Key Questions:
1. What is the per-N score breakdown?
2. Where are the biggest opportunities for improvement?
3. Is the rebuild from corners bug fix worth pursuing?
4. What datasets haven't been explored yet?

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json
import os

getcontext().prec = 30

# Tree shape
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def create_tree_polygon(x, y, angle):
    x, y, angle = float(x), float(y), float(angle)
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bbox_side(trees):
    if len(trees) == 0:
        return 0
    polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    union = unary_union(polygons)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def get_score(trees, n):
    side = get_bbox_side(trees)
    return (side ** 2) / n

print('Setup complete')

In [None]:
# Load current best submission (exp_016)
df = pd.read_csv('/home/code/experiments/016_jazivxt_ensemble/submission.csv')
df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)

best_trees = {}
best_scores = {}

for n, g in df.groupby('N'):
    trees = []
    for _, row in g.iterrows():
        x = str(row['x']).replace('s', '')
        y = str(row['y']).replace('s', '')
        deg = str(row['deg']).replace('s', '')
        trees.append({'x': x, 'y': y, 'deg': deg})
    best_trees[n] = trees
    best_scores[n] = get_score(trees, n)

total_score = sum(best_scores.values())
print(f'Total score: {total_score:.6f}')
print(f'Target: 68.876781')
print(f'Gap: {total_score - 68.876781:.6f}')

In [None]:
# Score breakdown by N ranges
ranges = [
    (1, 1, 'N=1'),
    (2, 5, 'N=2-5'),
    (6, 10, 'N=6-10'),
    (11, 50, 'N=11-50'),
    (51, 100, 'N=51-100'),
    (101, 200, 'N=101-200')
]

print('Score breakdown by N range:')
print('=' * 50)
for start, end, label in ranges:
    range_score = sum(best_scores[n] for n in range(start, end+1))
    pct = range_score / total_score * 100
    print(f'{label:12s}: {range_score:8.4f} ({pct:5.1f}%)')

In [None]:
# Find N values with highest individual scores (most room for improvement)
scores_list = [(n, best_scores[n]) for n in range(1, 201)]
scores_list.sort(key=lambda x: x[1], reverse=True)

print('\nTop 20 N values by individual score:')
print('=' * 50)
for n, score in scores_list[:20]:
    print(f'N={n:3d}: {score:.6f}')

In [None]:
# Analyze the chistyakov rebuild from corners technique
# The key insight: use POLYGON BOUNDS, not tree center

print('\nAnalyzing rebuild from corners technique:')
print('=' * 50)

# For a sample tree, show the difference between center and polygon bounds
test_tree = {'x': '0', 'y': '0', 'deg': '45'}  # 45 degree rotation
poly = create_tree_polygon(test_tree['x'], test_tree['y'], test_tree['deg'])
bounds = poly.bounds

print(f'Tree at center (0, 0) with 45 degree rotation:')
print(f'  Center: (0, 0)')
print(f'  Polygon bounds: minx={bounds[0]:.4f}, miny={bounds[1]:.4f}, maxx={bounds[2]:.4f}, maxy={bounds[3]:.4f}')
print(f'  Max extent from center: {max(abs(bounds[0]), abs(bounds[2]), abs(bounds[1]), abs(bounds[3])):.4f}')
print(f'\nThis shows the tree extends ~0.7 from center, not 0!')

In [None]:
# Check if fixing the rebuild from corners bug could help
# Test on a few large layouts

def rebuild_from_corners_fixed(large_layout, target_n, current_best_score):
    """Extract subset of trees closest to each corner using POLYGON BOUNDS."""
    if len(large_layout) <= target_n:
        return None
    
    # Get layout bounds
    polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in large_layout]
    union = unary_union(polygons)
    bounds = union.bounds
    minx, miny, maxx, maxy = bounds
    
    corners = [(minx, miny), (minx, maxy), (maxx, miny), (maxx, maxy)]
    
    best_subset = None
    best_score = current_best_score
    
    for corner_x, corner_y in corners:
        # Sort trees by max POLYGON BOUNDS distance from corner (Chebyshev)
        trees_with_dist = []
        for t in large_layout:
            poly = create_tree_polygon(t['x'], t['y'], t['deg'])
            b = poly.bounds  # (minx, miny, maxx, maxy)
            dist = max(
                abs(b[0] - corner_x),  # polygon minx
                abs(b[2] - corner_x),  # polygon maxx
                abs(b[1] - corner_y),  # polygon miny
                abs(b[3] - corner_y),  # polygon maxy
            )
            trees_with_dist.append((dist, t))
        
        trees_with_dist.sort(key=lambda x: x[0])
        subset = [t for _, t in trees_with_dist[:target_n]]
        
        score = get_score(subset, target_n)
        if score < best_score - 1e-9:
            best_score = score
            best_subset = subset
    
    return (best_subset, best_score) if best_subset else None

print('Testing fixed rebuild from corners on sample layouts...')
print('=' * 50)

# Test on N=100, 150, 200 as sources
for source_n in [100, 150, 200]:
    large_layout = best_trees[source_n]
    improvements = []
    
    for target_n in range(2, min(50, source_n)):
        result = rebuild_from_corners_fixed(large_layout, target_n, best_scores[target_n])
        if result:
            subset, score = result
            improvement = best_scores[target_n] - score
            improvements.append((target_n, improvement))
    
    if improvements:
        print(f'\nSource N={source_n}: Found {len(improvements)} improvements!')
        for n, imp in improvements[:5]:
            print(f'  N={n}: +{imp:.6f}')
    else:
        print(f'\nSource N={source_n}: No improvements found')

In [None]:
# Summary of findings
print('\n' + '=' * 60)
print('SUMMARY')
print('=' * 60)
print(f'Current score: {total_score:.6f}')
print(f'Target: 68.876781')
print(f'Gap: {total_score - 68.876781:.6f} ({(total_score - 68.876781) / 68.876781 * 100:.2f}%)')
print(f'\nLargest score contributions:')
for start, end, label in ranges:
    range_score = sum(best_scores[n] for n in range(start, end+1))
    print(f'  {label}: {range_score:.4f}')
print(f'\nKey insight: N=101-200 contributes ~48% of total score')