# Evolver Loop 3 Analysis

## Issue: Submission failed with 'Overlapping trees in group 004'

Our local validation passed but Kaggle found overlaps. Need to investigate:
1. What's different about group 004?
2. Is our overlap detection too lenient?
3. How to fix this for future submissions?

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree

getcontext().prec = 50
scale_factor = Decimal('1e18')  # Higher precision
SCALE = float(scale_factor)

print('Libraries loaded')

Libraries loaded


In [2]:
# Load the failed submission
submission = pd.read_csv('/home/code/experiments/003_local_search/submission.csv')
print(f'Submission shape: {submission.shape}')
print(submission.head(10))

Submission shape: (20100, 4)
      id           x           y          deg
0  001_0        s0.0        s0.0        s45.0
1  002_0    s-0.0255     s0.0493         s0.0
2  002_1     s0.3688     s0.7431       s180.0
3  003_0   s0.431989   s-0.16564   s63.434948
4  003_1   s0.745039   s0.460459   s63.434948
5  003_2  s-0.127028   s0.505181  s243.434948
6  004_0   s0.124855  s-0.124889   s15.376252
7  004_1   s0.799799    s0.06072   s15.376252
8  004_2   s0.250201    s0.73928  s195.376252
9  004_3   s0.925145   s0.924889  s195.376252


In [3]:
# Extract group 004 (N=4)
group_004 = submission[submission['id'].str.startswith('004_')].copy()
print(f'Group 004 has {len(group_004)} trees')
print(group_004)

Group 004 has 4 trees
      id          x           y          deg
6  004_0  s0.124855  s-0.124889   s15.376252
7  004_1  s0.799799    s0.06072   s15.376252
8  004_2  s0.250201    s0.73928  s195.376252
9  004_3  s0.925145   s0.924889  s195.376252


In [4]:
# Parse the values (remove 's' prefix)
def parse_val(s):
    return Decimal(str(s).replace('s', ''))

group_004['x_val'] = group_004['x'].apply(parse_val)
group_004['y_val'] = group_004['y'].apply(parse_val)
group_004['deg_val'] = group_004['deg'].apply(parse_val)

print('Parsed values:')
for _, row in group_004.iterrows():
    print(f"  {row['id']}: x={row['x_val']}, y={row['y_val']}, deg={row['deg_val']}")

Parsed values:
  004_0: x=0.124855, y=-0.124889, deg=15.376252
  004_1: x=0.799799, y=0.06072, deg=15.376252
  004_2: x=0.250201, y=0.73928, deg=195.376252
  004_3: x=0.925145, y=0.924889, deg=195.376252


In [5]:
# Create tree polygons with HIGH precision
class ChristmasTree:
    def __init__(self, center_x, center_y, angle):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        self._build_polygon()
    
    def _build_polygon(self):
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

print('ChristmasTree class defined')

ChristmasTree class defined


In [6]:
# Create trees for group 004
trees = []
for _, row in group_004.iterrows():
    tree = ChristmasTree(row['x_val'], row['y_val'], row['deg_val'])
    trees.append(tree)

print(f'Created {len(trees)} trees')

Created 4 trees


In [7]:
# Check ALL pairs for overlap with VERY strict criteria
print('Checking all pairs for overlap...')
for i in range(len(trees)):
    for j in range(i+1, len(trees)):
        poly_i = trees[i].polygon
        poly_j = trees[j].polygon
        
        if poly_i.intersects(poly_j):
            intersection = poly_i.intersection(poly_j)
            area = intersection.area / (SCALE ** 2)
            
            # Check different thresholds
            print(f'\nTrees {i} and {j}:')
            print(f'  Intersects: True')
            print(f'  Intersection area: {area:.2e}')
            print(f'  Touches only: {poly_i.touches(poly_j)}')
            print(f'  Overlaps: {poly_i.overlaps(poly_j)}')
            
            if area > 0:
                print(f'  *** POTENTIAL OVERLAP DETECTED ***')

Checking all pairs for overlap...

Trees 0 and 2:
  Intersects: True
  Intersection area: 3.47e-13
  Touches only: False
  Overlaps: True
  *** POTENTIAL OVERLAP DETECTED ***

Trees 1 and 3:
  Intersects: True
  Intersection area: 3.47e-13
  Touches only: False
  Overlaps: True
  *** POTENTIAL OVERLAP DETECTED ***


In [8]:
# Let's also check using a different approach - buffer by tiny amount
print('\nChecking with buffered polygons (stricter)...')
for i in range(len(trees)):
    for j in range(i+1, len(trees)):
        poly_i = trees[i].polygon.buffer(-1)  # Shrink by 1 unit (in scaled coords)
        poly_j = trees[j].polygon.buffer(-1)
        
        if poly_i.intersects(poly_j):
            intersection = poly_i.intersection(poly_j)
            area = intersection.area / (SCALE ** 2)
            if area > 1e-20:
                print(f'Trees {i} and {j}: buffered intersection area = {area:.2e}')


Checking with buffered polygons (stricter)...
Trees 0 and 2: buffered intersection area = 3.47e-13
Trees 1 and 3: buffered intersection area = 3.47e-13


In [9]:
# Check the actual coordinates more carefully
print('\nDetailed tree positions:')
for i, tree in enumerate(trees):
    bounds = tree.polygon.bounds
    print(f'Tree {i}: center=({float(tree.center_x):.6f}, {float(tree.center_y):.6f}), angle={float(tree.angle):.6f}')
    print(f'  Bounds (scaled): minx={bounds[0]:.0f}, miny={bounds[1]:.0f}, maxx={bounds[2]:.0f}, maxy={bounds[3]:.0f}')
    print(f'  Bounds (real): minx={bounds[0]/SCALE:.6f}, miny={bounds[1]/SCALE:.6f}, maxx={bounds[2]/SCALE:.6f}, maxy={bounds[3]/SCALE:.6f}')


Detailed tree positions:
Tree 0: center=(0.124855, -0.124889), angle=15.376252
  Bounds (scaled): minx=-212616886264613760, miny=-337616815040159680, maxx=462326886264613760, maxy=646475311461974272
  Bounds (real): minx=-0.212617, miny=-0.337617, maxx=0.462327, maxy=0.646475
Tree 1: center=(0.799799, 0.060720), angle=15.376252
  Bounds (scaled): minx=462327113735386240, miny=-152007815040159680, maxx=1137270886264613760, maxy=832084311461974272
  Bounds (real): minx=0.462327, miny=-0.152008, maxx=1.137271, maxy=0.832084
Tree 2: center=(0.250201, 0.739280), angle=195.376252
  Bounds (scaled): minx=-87270886264613760, miny=-32084311461974272, maxx=587672886264613760, maxy=952007815040159744
  Bounds (real): minx=-0.087271, miny=-0.032084, maxx=0.587673, maxy=0.952008
Tree 3: center=(0.925145, 0.924889), angle=195.376252
  Bounds (scaled): minx=587673113735386240, miny=153524688538025728, maxx=1262616886264613888, maxy=1137616815040159744
  Bounds (real): minx=0.587673, miny=0.153525, m

In [10]:
# The issue is clear: our overlap threshold (1e-15) is too lenient
# Kaggle detected overlaps of 3.47e-13 which is 347x larger than our threshold

# The problem is that rotation tightening + local search created these overlaps
# We need to either:
# 1. Use stricter overlap checking (area > 0 instead of area > 1e-15)
# 2. Use the grid placement without rotation tightening for small N
# 3. Add a final validation pass that removes any overlapping configurations

print("Root cause identified:")
print("- Our overlap threshold: 1e-15")
print("- Actual overlap in group 004: 3.47e-13")
print("- This is 347x larger than our threshold")
print("\nSolution: Use ZERO tolerance for overlaps (area > 0)")

Root cause identified:
- Our overlap threshold: 1e-15
- Actual overlap in group 004: 3.47e-13
- This is 347x larger than our threshold

Solution: Use ZERO tolerance for overlaps (area > 0)
