# Evolver Loop 9 Analysis

## Key Question: Why are we stuck at 70.647?

After 9 experiments, ALL valid submissions score ~70.647. The evaluator identified:
1. bbox3 and sa_v1_parallel produce overlapping trees that fail Kaggle validation
2. The baseline is at a strong local optimum
3. Need fundamentally different approaches

## New Techniques Discovered:
1. **Deletion cascade** (jiweiliu): Extract smaller N solutions from larger optimized layouts
2. **Rebuild from corners** (chistyakov): Same idea - find better small N in large N layouts
3. **Tessellation** (egortrushin): Grid-based initial configs for specific N values

In [None]:
import pandas as pd
import numpy as np
import os
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.ops import unary_union
import json

# Tree shape coordinates
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, deg):
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def get_bounding_box_side(trees):
    """Get the side length of the bounding square for a list of trees."""
    all_x, all_y = [], []
    for x, y, deg in trees:
        poly = create_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def parse_submission(filepath):
    df = pd.read_csv(filepath)
    def parse_val(v):
        if isinstance(v, str) and v.startswith('s'):
            return float(v[1:])
        return float(v)
    df['x_val'] = df['x'].apply(parse_val)
    df['y_val'] = df['y'].apply(parse_val)
    df['deg_val'] = df['deg'].apply(parse_val)
    df['N'] = df['id'].apply(lambda x: int(str(x).split('_')[0]))
    result = {}
    for n, group in df.groupby('N'):
        trees = list(zip(group['x_val'], group['y_val'], group['deg_val']))
        result[n] = trees
    return result

print('Functions defined')

In [None]:
# Load baseline
baseline_path = '/home/code/exploration/datasets/submission.csv'
baseline = parse_submission(baseline_path)

# Calculate per-N scores
baseline_scores = {}
for n in range(1, 201):
    side = get_bounding_box_side(baseline[n])
    baseline_scores[n] = side**2 / n

total_score = sum(baseline_scores.values())
print(f'Baseline total score: {total_score:.6f}')
print(f'Target: 68.919154')
print(f'Gap: {total_score - 68.919154:.6f}')

In [None]:
# Analyze which N values contribute most to the score
contributions = [(n, baseline_scores[n], baseline_scores[n]/total_score*100) for n in range(1, 201)]
contributions.sort(key=lambda x: -x[1])

print('\nTop 20 N values by contribution to score:')
print('N\tScore\t\t% of Total')
for n, score, pct in contributions[:20]:
    print(f'{n}\t{score:.6f}\t{pct:.2f}%')

In [None]:
# Calculate theoretical minimum (if all trees could be packed perfectly)
# For N trees, the minimum bounding box would be if they could be packed with no wasted space
# Tree area is approximately 0.35 * 1.0 = 0.35 (rough estimate)
# But this is a very rough lower bound

print('\nAnalysis of score breakdown by N range:')
ranges = [(1, 20), (21, 50), (51, 100), (101, 150), (151, 200)]
for start, end in ranges:
    range_score = sum(baseline_scores[n] for n in range(start, end+1))
    range_pct = range_score / total_score * 100
    print(f'N={start}-{end}: {range_score:.4f} ({range_pct:.1f}%)')

In [None]:
# Key insight: The deletion cascade technique
# For each large N, we can extract smaller N solutions by removing trees
# The tree to remove is the one that minimizes the new bounding box

def find_best_tree_to_remove(trees):
    """Find which tree, when removed, gives the smallest bounding box."""
    best_idx = 0
    best_side = float('inf')
    
    for i in range(len(trees)):
        remaining = trees[:i] + trees[i+1:]
        if len(remaining) == 0:
            continue
        side = get_bounding_box_side(remaining)
        if side < best_side:
            best_side = side
            best_idx = i
    
    return best_idx, best_side

def deletion_cascade(trees_n, baseline_scores):
    """Starting from N trees, cascade down to find better solutions for smaller N."""
    improvements = {}
    current_trees = list(trees_n)
    n = len(current_trees)
    
    while n > 1:
        # Find best tree to remove
        best_idx, new_side = find_best_tree_to_remove(current_trees)
        
        # Remove that tree
        current_trees = current_trees[:best_idx] + current_trees[best_idx+1:]
        n = len(current_trees)
        
        # Calculate new score
        new_score = new_side**2 / n
        
        # Compare with baseline
        if new_score < baseline_scores[n]:
            improvement = baseline_scores[n] - new_score
            improvements[n] = (new_score, improvement, list(current_trees))
    
    return improvements

print('Testing deletion cascade on N=50...')
improvements = deletion_cascade(baseline[50], baseline_scores)
print(f'Found {len(improvements)} improvements from N=50 cascade')
for n, (score, imp, _) in sorted(improvements.items())[:5]:
    print(f'  N={n}: {baseline_scores[n]:.6f} -> {score:.6f} (improvement: {imp:.6f})')

In [None]:
# Test deletion cascade on larger N values
print('\nTesting deletion cascade on multiple large N values...')

all_improvements = {}
for source_n in [100, 150, 200]:
    print(f'\nCascading from N={source_n}...')
    improvements = deletion_cascade(baseline[source_n], baseline_scores)
    print(f'  Found {len(improvements)} improvements')
    
    for n, (score, imp, trees) in improvements.items():
        if n not in all_improvements or imp > all_improvements[n][1]:
            all_improvements[n] = (score, imp, trees, source_n)

print(f'\nTotal unique improvements found: {len(all_improvements)}')
if all_improvements:
    total_improvement = sum(imp for _, imp, _, _ in all_improvements.values())
    print(f'Total score improvement: {total_improvement:.6f}')

In [None]:
# Apply all improvements and calculate new total score
if all_improvements:
    new_scores = baseline_scores.copy()
    for n, (score, imp, trees, source) in all_improvements.items():
        new_scores[n] = score
    
    new_total = sum(new_scores.values())
    print(f'\nOriginal total: {total_score:.6f}')
    print(f'New total: {new_total:.6f}')
    print(f'Improvement: {total_score - new_total:.6f}')
    print(f'Gap to target: {new_total - 68.919154:.6f}')
else:
    print('No improvements found from deletion cascade')