# Loop 5 Analysis: Fix Overlap Issue in Ensemble

The exp_004 submission failed with 'Overlapping trees in group 002'. Let's investigate:
1. What's in group 002 (N=2) in our submission?
2. Why does it have overlaps?
3. How to fix it?

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from decimal import Decimal, getcontext
getcontext().prec = 30

# Load the failed submission
submission = pd.read_csv('/home/code/experiments/004_ensemble_valid/submission.csv')
print(f"Total rows: {len(submission)}")
print(submission.head(10))

In [None]:
# Extract N=2 (group 002)
n2_rows = submission[submission['id'].str.startswith('002_')]
print(f"N=2 rows: {len(n2_rows)}")
print(n2_rows)

In [None]:
# Define tree geometry
def make_tree_polygon(x, y, deg):
    """Create tree polygon at position (x, y) with rotation deg."""
    # Tree template vertices
    tw, th = 0.15, 0.2
    bw, mw, ow = 0.7, 0.4, 0.25
    tip, t1, t2, base, tbot = 0.8, 0.5, 0.25, 0.0, -th
    
    vx = np.array([0, ow/2, ow/4, mw/2, mw/4, bw/2, tw/2, tw/2, -tw/2, -tw/2, -bw/2, -mw/4, -mw/2, -ow/4, -ow/2])
    vy = np.array([tip, t1, t1, t2, t2, base, base, tbot, tbot, base, base, t2, t2, t1, t1])
    
    # Rotate
    rad = np.radians(deg)
    cos_r, sin_r = np.cos(rad), np.sin(rad)
    rx = cos_r * vx - sin_r * vy + x
    ry = sin_r * vx + cos_r * vy + y
    
    return Polygon(zip(rx, ry))

# Parse the 's' prefix values
def parse_val(v):
    s = str(v)
    if s.startswith('s'):
        s = s[1:]
    return float(s)

# Check N=2 for overlaps
for _, row in n2_rows.iterrows():
    x = parse_val(row['x'])
    y = parse_val(row['y'])
    deg = parse_val(row['deg'])
    print(f"{row['id']}: x={x:.15f}, y={y:.15f}, deg={deg:.15f}")

In [None]:
# Create polygons for N=2 and check overlap
trees = []
for _, row in n2_rows.iterrows():
    x = parse_val(row['x'])
    y = parse_val(row['y'])
    deg = parse_val(row['deg'])
    poly = make_tree_polygon(x, y, deg)
    trees.append((row['id'], poly))

# Check overlap
if len(trees) >= 2:
    poly1 = trees[0][1]
    poly2 = trees[1][1]
    
    print(f"Tree 0 area: {poly1.area}")
    print(f"Tree 1 area: {poly2.area}")
    print(f"Intersection: {poly1.intersection(poly2)}")
    print(f"Intersection area: {poly1.intersection(poly2).area}")
    print(f"Do they overlap? {poly1.intersects(poly2) and not poly1.touches(poly2)}")
    print(f"Do they touch? {poly1.touches(poly2)}")

In [None]:
# Check with higher precision using integer scaling
SCALE = 10**18

def make_tree_polygon_int(x, y, deg):
    """Create tree polygon with integer coordinates for precise overlap detection."""
    tw, th = Decimal('0.15'), Decimal('0.2')
    bw, mw, ow = Decimal('0.7'), Decimal('0.4'), Decimal('0.25')
    tip, t1, t2, base, tbot = Decimal('0.8'), Decimal('0.5'), Decimal('0.25'), Decimal('0'), -th
    
    vx = [Decimal('0'), ow/2, ow/4, mw/2, mw/4, bw/2, tw/2, tw/2, -tw/2, -tw/2, -bw/2, -mw/4, -mw/2, -ow/4, -ow/2]
    vy = [tip, t1, t1, t2, t2, base, base, tbot, tbot, base, base, t2, t2, t1, t1]
    
    # Convert to Decimal
    x_d = Decimal(str(x))
    y_d = Decimal(str(y))
    deg_d = Decimal(str(deg))
    
    # Rotate using high precision
    import math
    rad = float(deg_d) * math.pi / 180
    cos_r = Decimal(str(math.cos(rad)))
    sin_r = Decimal(str(math.sin(rad)))
    
    coords = []
    for vxi, vyi in zip(vx, vy):
        rx = cos_r * vxi - sin_r * vyi + x_d
        ry = sin_r * vxi + cos_r * vyi + y_d
        # Scale to integer
        coords.append((int(rx * SCALE), int(ry * SCALE)))
    
    return Polygon(coords)

# Check with integer precision
trees_int = []
for _, row in n2_rows.iterrows():
    x = parse_val(row['x'])
    y = parse_val(row['y'])
    deg = parse_val(row['deg'])
    poly = make_tree_polygon_int(x, y, deg)
    trees_int.append((row['id'], poly))

if len(trees_int) >= 2:
    poly1 = trees_int[0][1]
    poly2 = trees_int[1][1]
    
    print(f"Integer-scaled overlap check:")
    print(f"Intersection area (scaled): {poly1.intersection(poly2).area}")
    print(f"Do they overlap? {poly1.intersects(poly2) and not poly1.touches(poly2)}")

In [None]:
# Let's check what the VALID baseline has for N=2
valid_baseline = pd.read_csv('/home/nonroot/snapshots/santa-2025/21328309254/submission/submission.csv')
n2_valid = valid_baseline[valid_baseline['id'].str.startswith('002_')]
print("Valid baseline N=2:")
print(n2_valid)

In [None]:
# Compare our ensemble N=2 with valid baseline N=2
print("\nOur ensemble N=2:")
for _, row in n2_rows.iterrows():
    print(f"  {row['id']}: x={row['x']}, y={row['y']}, deg={row['deg']}")

print("\nValid baseline N=2:")
for _, row in n2_valid.iterrows():
    print(f"  {row['id']}: x={row['x']}, y={row['y']}, deg={row['deg']}")

In [None]:
# Check which snapshot contributed N=2 to our ensemble
# Load the ensemble map
import json
import os

ensemble_map_path = '/home/code/experiments/004_ensemble_valid/ensemble_map.json'
if os.path.exists(ensemble_map_path):
    with open(ensemble_map_path) as f:
        ensemble_map = json.load(f)
    print(f"N=2 source: {ensemble_map.get('2', 'not found')}")
else:
    print("No ensemble_map.json found")

In [None]:
# Let's check all snapshots for N=2 and find the best VALID one
import glob

snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
snapshots = sorted(glob.glob(f'{snapshot_dir}*/submission/submission.csv'))

print(f"Found {len(snapshots)} snapshots")

# Check N=2 in each snapshot
n2_scores = []
for snap_path in snapshots:
    try:
        df = pd.read_csv(snap_path)
        n2 = df[df['id'].str.startswith('002_')]
        if len(n2) != 2:
            continue
        
        # Parse values
        trees = []
        for _, row in n2.iterrows():
            x = parse_val(row['x'])
            y = parse_val(row['y'])
            deg = parse_val(row['deg'])
            trees.append((x, y, deg))
        
        # Create polygons and check overlap
        poly1 = make_tree_polygon(trees[0][0], trees[0][1], trees[0][2])
        poly2 = make_tree_polygon(trees[1][0], trees[1][1], trees[1][2])
        
        has_overlap = poly1.intersects(poly2) and not poly1.touches(poly2)
        
        # Calculate score
        all_x = []
        all_y = []
        for x, y, deg in trees:
            poly = make_tree_polygon(x, y, deg)
            coords = list(poly.exterior.coords)
            all_x.extend([c[0] for c in coords])
            all_y.extend([c[1] for c in coords])
        
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        score = side * side / 2
        
        snap_id = snap_path.split('/')[-3]
        n2_scores.append((snap_id, score, has_overlap))
    except Exception as e:
        continue

# Sort by score
n2_scores.sort(key=lambda x: x[1])

print("\nTop 10 N=2 solutions:")
for snap_id, score, has_overlap in n2_scores[:10]:
    status = "OVERLAP" if has_overlap else "VALID"
    print(f"  {snap_id}: score={score:.6f} [{status}]")