# Evolver Loop 1 Analysis: Investigating Overlap in Group 040

The submission failed with "Overlapping trees in group 040". Need to:
1. Understand why our overlap detection missed this
2. Fix the overlap detection
3. Repair the submission

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
import os

# Christmas tree polygon vertices (15 vertices)
TREE_VERTICES = np.array([
    (0.0, 0.8),      # Tip
    (0.125, 0.5),    # Right top tier
    (0.0625, 0.5),
    (0.2, 0.25),     # Right mid tier
    (0.1, 0.25),
    (0.35, 0.0),     # Right base
    (0.075, 0.0),    # Right trunk
    (0.075, -0.2),
    (-0.075, -0.2),  # Left trunk
    (-0.075, 0.0),
    (-0.35, 0.0),    # Left base
    (-0.1, 0.25),    # Left mid tier
    (-0.2, 0.25),
    (-0.0625, 0.5),  # Left top tier
    (-0.125, 0.5),
])

class ChristmasTree:
    def __init__(self, x=0, y=0, deg=0):
        self.x = x
        self.y = y
        self.deg = deg
        self._polygon = None
    
    @property
    def polygon(self):
        if self._polygon is None:
            base = Polygon(TREE_VERTICES)
            rotated = rotate(base, self.deg, origin=(0, 0))
            self._polygon = translate(rotated, self.x, self.y)
        return self._polygon

def load_submission(filepath):
    df = pd.read_csv(filepath)
    for col in ['x', 'y', 'deg']:
        df[col] = df[col].astype(str).str.replace('s', '', regex=False).astype(float)
    return df

def get_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        trees.append(ChristmasTree(row['x'], row['y'], row['deg']))
    return trees

print("Functions defined")

In [None]:
# Load the submitted file
submission_path = '/home/submission/submission.csv'
df = load_submission(submission_path)
print(f"Loaded {len(df)} rows")

# Check group 040 specifically
trees_40 = get_trees_for_n(df, 40)
print(f"Group 040 has {len(trees_40)} trees")

In [None]:
# More rigorous overlap detection
def check_overlaps_detailed(trees, tolerance=1e-10):
    """Check for overlaps with detailed reporting."""
    overlaps = []
    polygons = [t.polygon for t in trees]
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > tolerance:
                    overlaps.append({
                        'tree_i': i,
                        'tree_j': j,
                        'intersection_area': intersection.area,
                        'intersection_type': intersection.geom_type
                    })
    return overlaps

# Check group 040
overlaps_40 = check_overlaps_detailed(trees_40)
print(f"Overlaps in group 040: {len(overlaps_40)}")
for o in overlaps_40:
    print(f"  Trees {o['tree_i']} and {o['tree_j']}: area={o['intersection_area']:.10f}, type={o['intersection_type']}")

In [None]:
# Check all groups for overlaps with stricter tolerance
print("Checking all groups for overlaps...")
all_overlaps = {}
for n in range(1, 201):
    trees = get_trees_for_n(df, n)
    overlaps = check_overlaps_detailed(trees, tolerance=1e-12)
    if overlaps:
        all_overlaps[n] = overlaps
        print(f"  Group {n:03d}: {len(overlaps)} overlaps")

print(f"\nTotal groups with overlaps: {len(all_overlaps)}")

In [None]:
# Load the original ensemble.csv to compare
original_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv'
original_df = load_submission(original_path)

# Check group 040 in original
original_trees_40 = get_trees_for_n(original_df, 40)
overlaps_original_40 = check_overlaps_detailed(original_trees_40)
print(f"Overlaps in original group 040: {len(overlaps_original_40)}")
for o in overlaps_original_40:
    print(f"  Trees {o['tree_i']} and {o['tree_j']}: area={o['intersection_area']:.10f}")

In [None]:
# Compare the data for group 040 between original and submitted
print("Original group 040:")
original_40_data = original_df[original_df['id'].str.startswith('040_')]
print(original_40_data.head(10))

print("\nSubmitted group 040:")
submitted_40_data = df[df['id'].str.startswith('040_')]
print(submitted_40_data.head(10))

In [None]:
# Check if the data is different
print("Checking if data differs between original and submitted...")
for col in ['x', 'y', 'deg']:
    orig_vals = original_40_data[col].values
    sub_vals = submitted_40_data[col].values
    diff = np.abs(orig_vals - sub_vals).max()
    print(f"  {col}: max difference = {diff}")

# Check if the issue is in how we wrote the submission
print("\nRe-reading submission to check format...")
raw_df = pd.read_csv(submission_path)
print(raw_df[raw_df['id'].str.startswith('040_')].head(5))