# Evolver Loop 2 Analysis

## Goal: Find the best valid solution and understand why we're stuck

The submission failed with 'Overlapping trees in group 008'. Need to:
1. Find the best VALID solution from all sources
2. Understand why the C++ optimizers aren't improving
3. Identify a fundamentally different approach

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import glob
import os

# Tree shape definition (closed polygon)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125, 0]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5, 0.8]

def get_tree_polygon(x, y, angle_deg):
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle_deg, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x, yoff=y)
    return poly

def has_overlap(poly1, poly2, tolerance=1e-9):
    if not poly1.intersects(poly2):
        return False
    intersection = poly1.intersection(poly2)
    return intersection.area > tolerance

def check_overlaps_for_n(polys):
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if has_overlap(polys[i], polys[j]):
                return True
    return False

def get_side_length(polys):
    union = unary_union(polys)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def parse_submission(csv_path):
    try:
        df = pd.read_csv(csv_path)
        if 'x' not in df.columns:
            return None
        df['x_val'] = df['x'].astype(str).str.replace('s', '').astype(float)
        df['y_val'] = df['y'].astype(str).str.replace('s', '').astype(float)
        df['deg_val'] = df['deg'].astype(str).str.replace('s', '').astype(float)
        df['n'] = df['id'].apply(lambda x: int(str(x).split('_')[0]))
        result = {}
        for n in range(1, 201):
            group = df[df['n'] == n]
            if len(group) != n:
                return None
            result[n] = [(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
        return result
    except:
        return None

def calculate_score(sol):
    total = 0
    for n in range(1, 201):
        trees = sol[n]
        polys = [get_tree_polygon(x, y, deg) for x, y, deg in trees]
        side = get_side_length(polys)
        total += side**2 / n
    return total

def count_overlaps(sol):
    count = 0
    for n in range(1, 201):
        trees = sol[n]
        polys = [get_tree_polygon(x, y, deg) for x, y, deg in trees]
        if check_overlaps_for_n(polys):
            count += 1
    return count

print('Functions defined')

In [None]:
# Find and evaluate key pre-optimized solutions
key_files = [
    '/home/code/valid_baseline.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/chistyakov/submission_best.csv',
    '/home/nonroot/snapshots/santa-2025/21129617858/code/preoptimized/best_public.csv',
]

results = []
for f in key_files:
    if os.path.exists(f):
        sol = parse_submission(f)
        if sol:
            score = calculate_score(sol)
            overlaps = count_overlaps(sol)
            results.append({'file': f.split('/')[-1], 'score': score, 'overlaps': overlaps, 'path': f})
            print(f"{f.split('/')[-1]}: score={score:.6f}, overlaps={overlaps}")
        else:
            print(f"{f.split('/')[-1]}: PARSE FAILED")
    else:
        print(f"{f.split('/')[-1]}: NOT FOUND")

print(f"\nTarget: 68.894234")

In [None]:
# Find the best VALID solution (0 overlaps)
valid_results = [r for r in results if r['overlaps'] == 0]
if valid_results:
    best_valid = min(valid_results, key=lambda x: x['score'])
    print(f"Best VALID solution: {best_valid['file']}")
    print(f"Score: {best_valid['score']:.6f}")
    print(f"Gap to target: {best_valid['score'] - 68.894234:.6f}")
else:
    print("No valid solutions found!")

# Show all results sorted by score
print("\nAll results sorted by score:")
for r in sorted(results, key=lambda x: x['score']):
    status = 'VALID' if r['overlaps'] == 0 else f'INVALID ({r["overlaps"]} overlaps)'
    print(f"  {r['score']:.6f} - {r['file']} - {status}")

In [None]:
# Analyze per-N scores for the best valid solution
if valid_results:
    best_sol = parse_submission(best_valid['path'])
    
    # Calculate per-N scores
    per_n_scores = []
    for n in range(1, 201):
        trees = best_sol[n]
        polys = [get_tree_polygon(x, y, deg) for x, y, deg in trees]
        side = get_side_length(polys)
        score = side**2 / n
        per_n_scores.append({'n': n, 'side': side, 'score': score})
    
    # Show top contributors to score
    print("Top 20 score contributors (highest s²/n):")
    for item in sorted(per_n_scores, key=lambda x: -x['score'])[:20]:
        print(f"  N={item['n']:3d}: side={item['side']:.4f}, score={item['score']:.6f}")
    
    # Calculate cumulative contribution
    total = sum(x['score'] for x in per_n_scores)
    small_n = sum(x['score'] for x in per_n_scores if x['n'] <= 20)
    print(f"\nTotal score: {total:.6f}")
    print(f"Small N (1-20) contribution: {small_n:.6f} ({100*small_n/total:.1f}%)")
    print(f"Target: 68.894234")

In [None]:
# Check what the theoretical minimum might be for small N
# For N=1, the minimum is achieved at 45° rotation
import math

def get_min_bbox_for_single_tree():
    """Calculate minimum bounding box for a single tree at various rotations."""
    best_side = float('inf')
    best_angle = 0
    for angle in range(0, 360, 1):
        poly = get_tree_polygon(0, 0, angle)
        bounds = poly.bounds
        side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
        if side < best_side:
            best_side = side
            best_angle = angle
    return best_side, best_angle

min_side, best_angle = get_min_bbox_for_single_tree()
print(f"N=1 minimum: side={min_side:.6f} at angle={best_angle}°")
print(f"N=1 score contribution: {min_side**2:.6f}")

# Check current N=1 solution
if valid_results:
    n1_trees = best_sol[1]
    n1_poly = get_tree_polygon(n1_trees[0][0], n1_trees[0][1], n1_trees[0][2])
    n1_bounds = n1_poly.bounds
    n1_side = max(n1_bounds[2] - n1_bounds[0], n1_bounds[3] - n1_bounds[1])
    print(f"\nCurrent N=1: side={n1_side:.6f}, angle={n1_trees[0][2]:.1f}°")
    print(f"Current N=1 score: {n1_side**2:.6f}")
    print(f"Potential improvement: {n1_side**2 - min_side**2:.6f}")