# Evolver Loop 1 Analysis

## Objective
Analyze the current state and identify the best path to beat target score 68.922808.

## Current Status
- Baseline: 70.676102 (best_ensemble.csv)
- Target: 68.922808
- Gap: 1.753294 points (2.5% improvement needed)

In [None]:
import pandas as pd
import numpy as np
import os
import glob

# Find all available pre-optimized CSVs
preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'
csv_files = glob.glob(f'{preopt_dir}/**/*.csv', recursive=True)
print(f'Found {len(csv_files)} CSV files in preoptimized folder')

In [None]:
# Load and score all CSVs to understand what we have
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

getcontext().prec = 30

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        trunk_w, trunk_h = Decimal('0.15'), Decimal('0.2')
        base_w, mid_w, top_w = Decimal('0.7'), Decimal('0.4'), Decimal('0.25')
        tip_y, tier_1_y, tier_2_y = Decimal('0.8'), Decimal('0.5'), Decimal('0.25')
        base_y, trunk_bottom_y = Decimal('0.0'), -trunk_h
        
        initial_polygon = Polygon([
            (0, float(tip_y)),
            (float(top_w/2), float(tier_1_y)), (float(top_w/4), float(tier_1_y)),
            (float(mid_w/2), float(tier_2_y)), (float(mid_w/4), float(tier_2_y)),
            (float(base_w/2), float(base_y)),
            (float(trunk_w/2), float(base_y)), (float(trunk_w/2), float(trunk_bottom_y)),
            (float(-trunk_w/2), float(trunk_bottom_y)), (float(-trunk_w/2), float(base_y)),
            (float(-base_w/2), float(base_y)),
            (float(-mid_w/4), float(tier_2_y)), (float(-mid_w/2), float(tier_2_y)),
            (float(-top_w/4), float(tier_1_y)), (float(-top_w/2), float(tier_1_y)),
        ])
        
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

def load_submission(csv_path):
    df = pd.read_csv(csv_path)
    configs = {}
    for _, row in df.iterrows():
        id_parts = row['id'].split('_')
        n = int(id_parts[0])
        x = str(row['x'])[1:] if str(row['x']).startswith('s') else str(row['x'])
        y = str(row['y'])[1:] if str(row['y']).startswith('s') else str(row['y'])
        deg = str(row['deg'])[1:] if str(row['deg']).startswith('s') else str(row['deg'])
        tree = ChristmasTree(x, y, deg)
        if n not in configs:
            configs[n] = []
        configs[n].append(tree)
    return configs

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    return True
    return False

def get_score(trees, n):
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    side_length = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
    return side_length**2 / n

def get_total_score(configs):
    return sum(get_score(trees, n) for n, trees in configs.items())

print('Functions defined')

In [None]:
# Score all available CSVs
results = []
for csv_path in csv_files:
    try:
        configs = load_submission(csv_path)
        if len(configs) != 200:
            continue
        overlap_count = sum(1 for n in range(1, 201) if has_overlap(configs[n]))
        score = get_total_score(configs)
        results.append({
            'file': csv_path.replace(preopt_dir, ''),
            'score': score,
            'overlaps': overlap_count
        })
    except Exception as e:
        pass

results_df = pd.DataFrame(results).sort_values('score')
print('\nAll available pre-optimized CSVs:')
print(results_df.to_string(index=False))

In [None]:
# Check the best CSV without overlaps
best_valid = results_df[results_df['overlaps'] == 0].iloc[0]
print(f"\nBest valid CSV (no overlaps):")
print(f"  File: {best_valid['file']}")
print(f"  Score: {best_valid['score']:.6f}")
print(f"  Gap to target: {best_valid['score'] - 68.922808:.6f}")

In [None]:
# Analyze score distribution by N for the best CSV
best_csv_path = preopt_dir + best_valid['file']
best_configs = load_submission(best_csv_path)

scores_by_n = []
for n in range(1, 201):
    score = get_score(best_configs[n], n)
    scores_by_n.append({'n': n, 'score': score})

scores_df = pd.DataFrame(scores_by_n)
print('\nScore distribution by N range:')
for start, end in [(1, 50), (51, 100), (101, 150), (151, 200)]:
    range_score = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]['score'].sum()
    pct = 100 * range_score / scores_df['score'].sum()
    print(f'  N={start}-{end}: {range_score:.6f} ({pct:.1f}%)')

In [None]:
# Check what tools are available
import os

bbox3_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bbox3'
print(f'bbox3 exists: {os.path.exists(bbox3_path)}')
print(f'bbox3 size: {os.path.getsize(bbox3_path)} bytes')

# Check if it's executable
import stat
st = os.stat(bbox3_path)
print(f'bbox3 executable: {bool(st.st_mode & stat.S_IXUSR)}')

# Check bucket-of-chump bbox3
boc_bbox3 = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/bbox3'
print(f'\nbucket-of-chump bbox3 exists: {os.path.exists(boc_bbox3)}')
if os.path.exists(boc_bbox3):
    print(f'bucket-of-chump bbox3 size: {os.path.getsize(boc_bbox3)} bytes')

In [None]:
# Summary of findings
print('='*60)
print('ANALYSIS SUMMARY')
print('='*60)
print(f'\nCurrent best score: {best_valid["score"]:.6f}')
print(f'Target score: 68.922808')
print(f'Gap to close: {best_valid["score"] - 68.922808:.6f} ({100*(best_valid["score"] - 68.922808)/68.922808:.2f}%)')
print(f'\nKey observations:')
print('1. Pre-optimized CSVs are at local optimum for local search')
print('2. bbox3 C++ optimizer is available and can be used')
print('3. N=151-200 contributes ~28% of score - focus here for max impact')
print('4. Need CONSTRUCTIVE approaches to escape local optima')
print('\nRecommended next steps:')
print('1. Submit baseline to verify LB score matches local score')
print('2. Run bbox3 optimizer on current best')
print('3. Implement backward propagation')
print('4. Try beam search construction for high-N configs')