# Evolver Loop 1 Analysis

## Issue: Submission failed with 'Overlapping trees in group 040'

The local overlap check passed but Kaggle validation failed. Need to investigate:
1. Precision loss in coordinate values
2. Difference between local and Kaggle overlap detection

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 50

print('Libraries loaded')

Libraries loaded


In [2]:
# Compare original best_ensemble.csv with our submission
original = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv')
submission = pd.read_csv('/home/code/experiments/001_baseline/submission.csv')

print('Original shape:', original.shape)
print('Submission shape:', submission.shape)

# Check N=40 entries
print('\nOriginal N=40 entries:')
print(original[original['id'].str.startswith('040_')].head(5))
print('\nSubmission N=40 entries:')
print(submission[submission['id'].str.startswith('040_')].head(5))

Original shape: (20100, 4)
Submission shape: (20100, 4)

Original N=40 entries:
        id                       x                       y  \
780  040_0  s-1.689632103422090692  s-0.247894431182562769   
781  040_1   s0.953906339823958005  s-0.552668496076432425   
782  040_2  s-0.821313951420922450   s0.356433575901729194   
783  040_3  s-1.689632103415815934   s0.489826127465763816   
784  040_4   s0.842519162317999837  s-1.882363352068190610   

                         deg  
780  s252.060472972624125987  
781  s252.151405771974452819  
782  s252.217176846480043650  
783  s252.060472935714216192  
784  s246.370622269343670041  

Submission N=40 entries:
        id                     x                      y                  deg
780  040_0  s-1.6896321034220907  s-0.24789443118256277  s252.06047297262413
781  040_1    s0.953906339823958   s-0.5526684960764324  s252.15140577197445
782  040_2  s-0.8213139514209225    s0.3564335759017292  s252.21717684648004
783  040_3   s-1.6896321034

In [3]:
# Check precision difference
print('Original x precision example:')
print(original.iloc[0]['x'])
print('\nSubmission x precision example:')
print(submission.iloc[0]['x'])

# Check if values are the same
for col in ['x', 'y', 'deg']:
    orig_vals = original[col].astype(str).str.replace('s', '').astype(float)
    sub_vals = submission[col].astype(str).str.replace('s', '').astype(float)
    diff = (orig_vals - sub_vals).abs().max()
    print(f'{col} max difference: {diff}')

Original x precision example:
s-48.196086194214246

Submission x precision example:
s-48.196086194214246
x max difference: 6.99141733250808
y max difference: 7.480094519096566
deg max difference: 89.99999324866258


In [4]:
# The issue is clear - we need to use the original CSV directly without any transformation
# Let's just copy the original best_ensemble.csv to submission
import shutil

# Copy original file directly
shutil.copy('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv', 
            '/home/submission/submission.csv')
shutil.copy('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv',
            '/home/code/experiments/001_baseline/submission.csv')

print('Copied original best_ensemble.csv directly')

# Verify
verify = pd.read_csv('/home/submission/submission.csv')
print(f'Submission rows: {len(verify)}')
print(f'First row: {verify.iloc[0].to_dict()}')

Copied original best_ensemble.csv directly
Submission rows: 20100
First row: {'id': '001_0', 'x': 's-48.196086194214246', 'y': 's58.770984615214225', 'deg': 's45.0'}


In [5]:
# Now let's verify the score of the original file
# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        trunk_w, trunk_h = Decimal('0.15'), Decimal('0.2')
        base_w, mid_w, top_w = Decimal('0.7'), Decimal('0.4'), Decimal('0.25')
        tip_y, tier_1_y, tier_2_y = Decimal('0.8'), Decimal('0.5'), Decimal('0.25')
        base_y, trunk_bottom_y = Decimal('0.0'), -trunk_h
        
        initial_polygon = Polygon([
            (0, float(tip_y)),
            (float(top_w/2), float(tier_1_y)), (float(top_w/4), float(tier_1_y)),
            (float(mid_w/2), float(tier_2_y)), (float(mid_w/4), float(tier_2_y)),
            (float(base_w/2), float(base_y)),
            (float(trunk_w/2), float(base_y)), (float(trunk_w/2), float(trunk_bottom_y)),
            (float(-trunk_w/2), float(trunk_bottom_y)), (float(-trunk_w/2), float(base_y)),
            (float(-base_w/2), float(base_y)),
            (float(-mid_w/4), float(tier_2_y)), (float(-mid_w/2), float(tier_2_y)),
            (float(-top_w/4), float(tier_1_y)), (float(-top_w/2), float(tier_1_y)),
        ])
        
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

print('ChristmasTree class defined')

ChristmasTree class defined


In [6]:
def load_submission(csv_path):
    df = pd.read_csv(csv_path)
    configs = {}
    
    for _, row in df.iterrows():
        id_parts = row['id'].split('_')
        n = int(id_parts[0])
        
        x = str(row['x'])[1:] if str(row['x']).startswith('s') else str(row['x'])
        y = str(row['y'])[1:] if str(row['y']).startswith('s') else str(row['y'])
        deg = str(row['deg'])[1:] if str(row['deg']).startswith('s') else str(row['deg'])
        
        tree = ChristmasTree(x, y, deg)
        
        if n not in configs:
            configs[n] = []
        configs[n].append(tree)
    
    return configs

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    return True
    return False

def get_score(trees, n):
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    side_length = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
    return side_length**2 / n

def get_total_score(configs):
    return sum(get_score(trees, n) for n, trees in configs.items())

print('Helper functions defined')

Helper functions defined


In [7]:
# Load and verify the original best_ensemble.csv
configs = load_submission('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv')

print(f'Loaded {len(configs)} configurations')

# Check for overlaps
overlap_count = 0
overlap_ns = []
for n in range(1, 201):
    if has_overlap(configs[n]):
        overlap_count += 1
        overlap_ns.append(n)

print(f'Configurations with overlaps: {overlap_count}')
if overlap_ns:
    print(f'Overlapping N values: {overlap_ns}')

# Calculate score
score = get_total_score(configs)
print(f'Score: {score:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {score - 68.922808:.6f}')

Loaded 200 configurations


Configurations with overlaps: 0


Score: 70.676102
Target: 68.922808
Gap: 1.753294


In [8]:
# Check N=40 specifically for overlaps with more detail
print('Checking N=40 configuration in detail...')
trees_40 = configs[40]
polygons_40 = [t.polygon for t in trees_40]

print(f'Number of trees: {len(trees_40)}')

# Check each pair
overlap_pairs = []
for i in range(len(polygons_40)):
    for j in range(i+1, len(polygons_40)):
        if polygons_40[i].intersects(polygons_40[j]) and not polygons_40[i].touches(polygons_40[j]):
            overlap_pairs.append((i, j))
            intersection = polygons_40[i].intersection(polygons_40[j])
            print(f'  Trees {i} and {j} overlap! Intersection area: {intersection.area}')

print(f'Total overlapping pairs in N=40: {len(overlap_pairs)}')

Checking N=40 configuration in detail...
Number of trees: 40
Total overlapping pairs in N=40: 0


In [9]:
# Let's check all available pre-optimized CSVs for overlaps using a stricter check
# Maybe Kaggle uses a buffer or different tolerance

import os
from pathlib import Path

preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

def strict_has_overlap(trees, buffer=1e-10):
    """Check for overlaps with a small buffer to catch near-touching cases"""
    if len(trees) <= 1:
        return False
    polygons = [t.polygon.buffer(buffer) for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    return True
    return False

# Check N=40 with stricter tolerance
print("Checking N=40 with various buffer sizes:")
for buffer in [0, 1e-15, 1e-12, 1e-10, 1e-8, 1e-6]:
    has_ov = strict_has_overlap(configs[40], buffer)
    print(f"  Buffer {buffer}: {'OVERLAP' if has_ov else 'OK'}")

Checking N=40 with various buffer sizes:
  Buffer 0: OK
  Buffer 1e-15: OVERLAP
  Buffer 1e-12: OVERLAP
  Buffer 1e-10: OVERLAP
  Buffer 1e-08: OVERLAP
  Buffer 1e-06: OVERLAP


In [10]:
# Check all available CSVs for strict overlaps
csv_files = [
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv',
]

print("Checking all CSVs for strict overlaps (buffer=1e-15):")
for csv_path in csv_files:
    try:
        test_configs = load_submission(csv_path)
        strict_overlaps = []
        for n in range(1, 201):
            if strict_has_overlap(test_configs[n], buffer=1e-15):
                strict_overlaps.append(n)
        score = get_total_score(test_configs)
        print(f"{csv_path.split('/')[-1]}: score={score:.6f}, strict_overlaps={len(strict_overlaps)}")
        if strict_overlaps:
            print(f"  Overlapping N: {strict_overlaps[:10]}...")
    except Exception as e:
        print(f"{csv_path.split('/')[-1]}: Error - {e}")

Checking all CSVs for strict overlaps (buffer=1e-15):


best_ensemble.csv: score=70.676102, strict_overlaps=112
  Overlapping N: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]...


ensemble.csv: score=70.676102, strict_overlaps=112
  Overlapping N: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]...


santa-2025.csv: score=70.676102, strict_overlaps=112
  Overlapping N: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]...


submission.csv: score=70.676501, strict_overlaps=90
  Overlapping N: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]...


71.97.csv: score=71.972027, strict_overlaps=64
  Overlapping N: [21, 30, 39, 40, 41, 44, 45, 52, 55, 56]...


72.49.csv: score=72.495739, strict_overlaps=0


In [11]:
# The 72.49.csv has no strict overlaps - let's verify it's valid
csv_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv'
test_configs = load_submission(csv_path)

# Check regular overlaps
regular_overlaps = sum(1 for n in range(1, 201) if has_overlap(test_configs[n]))
score = get_total_score(test_configs)

print(f"72.49.csv:")
print(f"  Score: {score:.6f}")
print(f"  Regular overlaps: {regular_overlaps}")
print(f"  Strict overlaps (1e-15): 0")
print(f"  Gap to target: {score - 68.922808:.6f}")

# This is a valid submission but with worse score
# We need to find a way to fix the overlaps in the better CSVs

72.49.csv:
  Score: 72.495739
  Regular overlaps: 0
  Strict overlaps (1e-15): 0
  Gap to target: 3.572931


In [12]:
# Check more CSVs from the snapshot
more_csvs = [
    '/home/nonroot/snapshots/santa-2025/21116303805/code/experiments/004_sa_v1_parallel/submission_best.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/experiments/005_backward_propagation/submission.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/experiments/005_backward_propagation/optimized.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/submission.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/submission_70_936673758122.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/submission_70_926149550346.csv',
]

print("Checking more CSVs:")
results = []
for csv_path in more_csvs:
    try:
        test_configs = load_submission(csv_path)
        regular_overlaps = sum(1 for n in range(1, 201) if has_overlap(test_configs[n]))
        strict_overlaps = sum(1 for n in range(1, 201) if strict_has_overlap(test_configs[n], buffer=1e-15))
        score = get_total_score(test_configs)
        results.append((csv_path.split('/')[-1], score, regular_overlaps, strict_overlaps))
        print(f"{csv_path.split('/')[-1]}: score={score:.6f}, regular={regular_overlaps}, strict={strict_overlaps}")
    except Exception as e:
        print(f"{csv_path.split('/')[-1]}: Error - {e}")

# Sort by score
results.sort(key=lambda x: x[1])
print("\nBest CSVs by score:")
for name, score, reg, strict in results[:5]:
    print(f"  {name}: {score:.6f} (reg={reg}, strict={strict})")

Checking more CSVs:


submission_best.csv: score=70.676102, regular=0, strict=112


submission.csv: score=70.676102, regular=0, strict=112


optimized.csv: score=70.676102, regular=0, strict=112


submission.csv: score=70.676102, regular=0, strict=112


submission_70_936673758122.csv: score=70.936674, regular=0, strict=2


submission_70_926149550346.csv: score=70.926150, regular=0, strict=2

Best CSVs by score:
  submission_best.csv: 70.676102 (reg=0, strict=112)
  submission.csv: 70.676102 (reg=0, strict=112)
  optimized.csv: 70.676102 (reg=0, strict=112)
  submission.csv: 70.676102 (reg=0, strict=112)
  submission_70_926149550346.csv: 70.926150 (reg=0, strict=2)
