# Evolver Loop 1 Analysis

## Issue: Submission failed with 'Overlapping trees in group 040'

The local overlap check passed but Kaggle validation failed. Need to investigate:
1. Precision loss in coordinate values
2. Difference between local and Kaggle overlap detection

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 50

print('Libraries loaded')

In [None]:
# Compare original best_ensemble.csv with our submission
original = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv')
submission = pd.read_csv('/home/code/experiments/001_baseline/submission.csv')

print('Original shape:', original.shape)
print('Submission shape:', submission.shape)

# Check N=40 entries
print('\nOriginal N=40 entries:')
print(original[original['id'].str.startswith('040_')].head(5))
print('\nSubmission N=40 entries:')
print(submission[submission['id'].str.startswith('040_')].head(5))

In [None]:
# Check precision difference
print('Original x precision example:')
print(original.iloc[0]['x'])
print('\nSubmission x precision example:')
print(submission.iloc[0]['x'])

# Check if values are the same
for col in ['x', 'y', 'deg']:
    orig_vals = original[col].astype(str).str.replace('s', '').astype(float)
    sub_vals = submission[col].astype(str).str.replace('s', '').astype(float)
    diff = (orig_vals - sub_vals).abs().max()
    print(f'{col} max difference: {diff}')

In [None]:
# The issue is clear - we need to use the original CSV directly without any transformation
# Let's just copy the original best_ensemble.csv to submission
import shutil

# Copy original file directly
shutil.copy('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv', 
            '/home/submission/submission.csv')
shutil.copy('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv',
            '/home/code/experiments/001_baseline/submission.csv')

print('Copied original best_ensemble.csv directly')

# Verify
verify = pd.read_csv('/home/submission/submission.csv')
print(f'Submission rows: {len(verify)}')
print(f'First row: {verify.iloc[0].to_dict()}')

In [None]:
# Now let's verify the score of the original file
# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        trunk_w, trunk_h = Decimal('0.15'), Decimal('0.2')
        base_w, mid_w, top_w = Decimal('0.7'), Decimal('0.4'), Decimal('0.25')
        tip_y, tier_1_y, tier_2_y = Decimal('0.8'), Decimal('0.5'), Decimal('0.25')
        base_y, trunk_bottom_y = Decimal('0.0'), -trunk_h
        
        initial_polygon = Polygon([
            (0, float(tip_y)),
            (float(top_w/2), float(tier_1_y)), (float(top_w/4), float(tier_1_y)),
            (float(mid_w/2), float(tier_2_y)), (float(mid_w/4), float(tier_2_y)),
            (float(base_w/2), float(base_y)),
            (float(trunk_w/2), float(base_y)), (float(trunk_w/2), float(trunk_bottom_y)),
            (float(-trunk_w/2), float(trunk_bottom_y)), (float(-trunk_w/2), float(base_y)),
            (float(-base_w/2), float(base_y)),
            (float(-mid_w/4), float(tier_2_y)), (float(-mid_w/2), float(tier_2_y)),
            (float(-top_w/4), float(tier_1_y)), (float(-top_w/2), float(tier_1_y)),
        ])
        
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

print('ChristmasTree class defined')

In [None]:
def load_submission(csv_path):
    df = pd.read_csv(csv_path)
    configs = {}
    
    for _, row in df.iterrows():
        id_parts = row['id'].split('_')
        n = int(id_parts[0])
        
        x = str(row['x'])[1:] if str(row['x']).startswith('s') else str(row['x'])
        y = str(row['y'])[1:] if str(row['y']).startswith('s') else str(row['y'])
        deg = str(row['deg'])[1:] if str(row['deg']).startswith('s') else str(row['deg'])
        
        tree = ChristmasTree(x, y, deg)
        
        if n not in configs:
            configs[n] = []
        configs[n].append(tree)
    
    return configs

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    return True
    return False

def get_score(trees, n):
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    side_length = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
    return side_length**2 / n

def get_total_score(configs):
    return sum(get_score(trees, n) for n, trees in configs.items())

print('Helper functions defined')

In [None]:
# Load and verify the original best_ensemble.csv
configs = load_submission('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv')

print(f'Loaded {len(configs)} configurations')

# Check for overlaps
overlap_count = 0
overlap_ns = []
for n in range(1, 201):
    if has_overlap(configs[n]):
        overlap_count += 1
        overlap_ns.append(n)

print(f'Configurations with overlaps: {overlap_count}')
if overlap_ns:
    print(f'Overlapping N values: {overlap_ns}')

# Calculate score
score = get_total_score(configs)
print(f'Score: {score:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {score - 68.922808:.6f}')

In [None]:
# Check N=40 specifically for overlaps with more detail
print('Checking N=40 configuration in detail...')
trees_40 = configs[40]
polygons_40 = [t.polygon for t in trees_40]

print(f'Number of trees: {len(trees_40)}')

# Check each pair
overlap_pairs = []
for i in range(len(polygons_40)):
    for j in range(i+1, len(polygons_40)):
        if polygons_40[i].intersects(polygons_40[j]) and not polygons_40[i].touches(polygons_40[j]):
            overlap_pairs.append((i, j))
            intersection = polygons_40[i].intersection(polygons_40[j])
            print(f'  Trees {i} and {j} overlap! Intersection area: {intersection.area}')

print(f'Total overlapping pairs in N=40: {len(overlap_pairs)}')