# Evolver Loop 13 Analysis

## Issue: Submission failed with 'Overlapping trees in group 004'

Our local validation passed but Kaggle rejected. Need to investigate the N=4 case specifically.

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 50  # Higher precision
SCALE = 10**18

print("Setup complete")

In [None]:
# Load the submission
df = pd.read_csv('/home/code/experiments/014_nfp_local_search/submission.csv')
print(f"Submission shape: {df.shape}")

# Extract N=4 trees
n4_trees = df[df['id'].str.startswith('004_')].copy()
print(f"\nN=4 trees:")
print(n4_trees)

In [None]:
# Tree shape vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_high_precision(x, y, angle):
    """Create tree polygon with integer coordinates (scaled by 1e18)"""
    x = Decimal(str(x).replace('s', ''))
    y = Decimal(str(y).replace('s', ''))
    angle = Decimal(str(angle).replace('s', ''))
    
    # Convert angle to radians
    import math
    angle_rad = float(angle) * math.pi / 180
    cos_a = Decimal(str(math.cos(angle_rad)))
    sin_a = Decimal(str(math.sin(angle_rad)))
    
    # Rotate and translate vertices
    vertices = []
    for tx, ty in zip(TX, TY):
        tx_d = Decimal(str(tx))
        ty_d = Decimal(str(ty))
        # Rotate
        rx = tx_d * cos_a - ty_d * sin_a
        ry = tx_d * sin_a + ty_d * cos_a
        # Translate
        px = rx + x
        py = ry + y
        # Scale to integer
        vertices.append((int(px * SCALE), int(py * SCALE)))
    
    return Polygon(vertices)

print("High precision tree function defined")

In [None]:
# Create polygons for N=4
polygons = []
for _, row in n4_trees.iterrows():
    poly = create_tree_high_precision(row['x'], row['y'], row['deg'])
    polygons.append(poly)
    print(f"Tree {row['id']}: area={poly.area / (SCALE**2):.6f}")

print(f"\nCreated {len(polygons)} polygons")

In [None]:
# Check for overlaps between all pairs
print("\nChecking overlaps between all pairs:")
for i in range(len(polygons)):
    for j in range(i+1, len(polygons)):
        p1, p2 = polygons[i], polygons[j]
        
        intersects = p1.intersects(p2)
        touches = p1.touches(p2)
        overlaps = p1.overlaps(p2)
        
        if intersects:
            intersection = p1.intersection(p2)
            int_area = intersection.area / (SCALE**2)
            print(f"  Trees {i} and {j}: intersects={intersects}, touches={touches}, overlaps={overlaps}, int_area={int_area:.15f}")
            
            if int_area > 1e-20:
                print(f"    *** OVERLAP DETECTED! ***")

In [None]:
# Let's also check with Shapely's default precision (no scaling)
from shapely.geometry import Polygon as ShapelyPolygon
from shapely import affinity

def create_tree_shapely(x, y, angle):
    x = float(str(x).replace('s', ''))
    y = float(str(y).replace('s', ''))
    angle = float(str(angle).replace('s', ''))
    
    coords = list(zip(TX, TY))
    poly = ShapelyPolygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

print("\nChecking with Shapely default precision:")
polys_shapely = []
for _, row in n4_trees.iterrows():
    poly = create_tree_shapely(row['x'], row['y'], row['deg'])
    polys_shapely.append(poly)

for i in range(len(polys_shapely)):
    for j in range(i+1, len(polys_shapely)):
        p1, p2 = polys_shapely[i], polys_shapely[j]
        if p1.intersects(p2) and not p1.touches(p2):
            intersection = p1.intersection(p2)
            print(f"  Trees {i} and {j}: OVERLAP! area={intersection.area:.15f}")

In [None]:
# Let's look at the current best submission (exp_012) for N=4
df_best = pd.read_csv('/home/code/experiments/012_full_snapshot_ensemble/submission.csv')
n4_best = df_best[df_best['id'].str.startswith('004_')].copy()
print("Best submission N=4 trees:")
print(n4_best)

# Check if they're the same
print("\nComparing to current submission:")
for col in ['x', 'y', 'deg']:
    same = (n4_trees[col].values == n4_best[col].values).all()
    print(f"  {col}: same={same}")

In [None]:
# The issue is that exp_014 used exp_012 as baseline and found 0 improvements
# So the submission should be identical to exp_012
# But exp_012 was submitted successfully (exp_010 LB: 70.3410)
# Let me check if there's a difference in the submission files

import hashlib

def file_hash(path):
    with open(path, 'rb') as f:
        return hashlib.md5(f.read()).hexdigest()

print("File hashes:")
print(f"  exp_012: {file_hash('/home/code/experiments/012_full_snapshot_ensemble/submission.csv')}")
print(f"  exp_014: {file_hash('/home/code/experiments/014_nfp_local_search/submission.csv')}")
print(f"  /home/submission: {file_hash('/home/submission/submission.csv')}")

In [None]:
# Check if the files are byte-identical
import filecmp

print("\nFile comparison:")
print(f"  exp_012 vs exp_014: {filecmp.cmp('/home/code/experiments/012_full_snapshot_ensemble/submission.csv', '/home/code/experiments/014_nfp_local_search/submission.csv')}")
print(f"  exp_014 vs /home/submission: {filecmp.cmp('/home/code/experiments/014_nfp_local_search/submission.csv', '/home/submission/submission.csv')}")

In [None]:
# Let's check if exp_012 submission has any issues with N=4
# First, let's see what the original source was for N=4

# Load exp_012 and check N=4
df_012 = pd.read_csv('/home/code/experiments/012_full_snapshot_ensemble/submission.csv')
n4_012 = df_012[df_012['id'].str.startswith('004_')].copy()

print("exp_012 N=4 trees:")
for _, row in n4_012.iterrows():
    print(f"  {row['id']}: x={row['x']}, y={row['y']}, deg={row['deg']}")

# Create polygons and check
polys_012 = [create_tree_shapely(row['x'], row['y'], row['deg']) for _, row in n4_012.iterrows()]

print("\nOverlap check for exp_012 N=4:")
for i in range(len(polys_012)):
    for j in range(i+1, len(polys_012)):
        p1, p2 = polys_012[i], polys_012[j]
        if p1.intersects(p2):
            intersection = p1.intersection(p2)
            if not p1.touches(p2):
                print(f"  Trees {i} and {j}: OVERLAP! area={intersection.area:.15e}")
            else:
                print(f"  Trees {i} and {j}: touches only")

In [None]:
# Let's check what the original santa-2025.csv has for N=4
df_santa = pd.read_csv('/home/code/kaggle_datasets/santa-2025.csv')
n4_santa = df_santa[df_santa['id'].str.startswith('004_')].copy()

print("santa-2025.csv N=4 trees:")
for _, row in n4_santa.iterrows():
    print(f"  {row['id']}: x={row['x']}, y={row['y']}, deg={row['deg']}")

# Create polygons and check
polys_santa = [create_tree_shapely(row['x'], row['y'], row['deg']) for _, row in n4_santa.iterrows()]

print("\nOverlap check for santa-2025.csv N=4:")
for i in range(len(polys_santa)):
    for j in range(i+1, len(polys_santa)):
        p1, p2 = polys_santa[i], polys_santa[j]
        if p1.intersects(p2):
            intersection = p1.intersection(p2)
            if not p1.touches(p2):
                print(f"  Trees {i} and {j}: OVERLAP! area={intersection.area:.15e}")
            else:
                print(f"  Trees {i} and {j}: touches only")

In [None]:
# The issue might be that our submission file got corrupted during the NFP process
# Let's use the exp_012 submission directly since it was validated

import shutil
shutil.copy('/home/code/experiments/012_full_snapshot_ensemble/submission.csv', '/home/submission/submission.csv')
print("Copied exp_012 submission to /home/submission/submission.csv")

# Verify
print(f"\nVerification:")
print(f"  /home/submission hash: {file_hash('/home/submission/submission.csv')}")
print(f"  exp_012 hash: {file_hash('/home/code/experiments/012_full_snapshot_ensemble/submission.csv')}")