# Loop 3 Analysis: Fix Overlapping Trees Issue

The submission failed with 'Overlapping trees in group 193'. We need to:
1. Identify and fix the overlap issue
2. Validate the submission properly before submitting
3. Analyze worst-performing N values for targeted optimization

In [None]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import os

getcontext().prec = 25
scale_factor = Decimal("1e18")

print("Libraries loaded")

In [None]:
# ChristmasTree class for validation
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def strip_s(val):
    s = str(val)
    return s[1:] if s.startswith('s') else s

def load_trees(n, df):
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = strip_s(row["x"])
        y = strip_s(row["y"])
        deg = strip_s(row["deg"])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        for idx in tree_index.query(poly):
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print("Validation functions defined")

In [None]:
# Check the current submission file for overlaps
print("Checking /home/submission/submission.csv for overlaps...")
df = pd.read_csv('/home/submission/submission.csv')
print(f"Total rows: {len(df)}")
print(f"Columns: {df.columns.tolist()}")
print(f"Sample rows:")
print(df.head(10))

In [None]:
# Check for overlaps in all groups
overlapping_groups = []
for n in range(1, 201):
    trees = load_trees(n, df)
    if len(trees) != n:
        print(f"Group {n}: Expected {n} trees, got {len(trees)}")
        continue
    if has_overlap(trees):
        overlapping_groups.append(n)
        if n == 193:
            print(f"Group {n}: OVERLAP DETECTED (this is the error group)")

print(f"\nTotal overlapping groups: {len(overlapping_groups)}")
if overlapping_groups:
    print(f"Overlapping groups: {overlapping_groups[:20]}...")

In [None]:
# Check the ensemble baseline for overlaps
print("\nChecking ensemble_baseline.csv for overlaps...")
df_baseline = pd.read_csv('/home/code/experiments/003_ensemble/ensemble_baseline.csv')
overlapping_baseline = []
for n in range(1, 201):
    trees = load_trees(n, df_baseline)
    if len(trees) != n:
        continue
    if has_overlap(trees):
        overlapping_baseline.append(n)

print(f"Overlapping groups in baseline: {len(overlapping_baseline)}")
if overlapping_baseline:
    print(f"Groups: {overlapping_baseline[:20]}...")

In [None]:
# Check the best_seed2.csv (the best C++ optimizer output)
print("\nChecking best_seed2.csv for overlaps...")
df_seed2 = pd.read_csv('/home/code/experiments/003_ensemble/best_seed2.csv')
overlapping_seed2 = []
for n in range(1, 201):
    trees = load_trees(n, df_seed2)
    if len(trees) != n:
        continue
    if has_overlap(trees):
        overlapping_seed2.append(n)

print(f"Overlapping groups in best_seed2: {len(overlapping_seed2)}")
if overlapping_seed2:
    print(f"Groups: {overlapping_seed2[:20]}...")

In [None]:
# Check the submission_fd.csv (fix direction output)
print("\nChecking submission_fd.csv for overlaps...")
df_fd = pd.read_csv('/home/code/experiments/003_ensemble/submission_fd.csv')
print(f"Total rows: {len(df_fd)}")
overlapping_fd = []
for n in range(1, 201):
    trees = load_trees(n, df_fd)
    if len(trees) != n:
        print(f"Group {n}: Expected {n} trees, got {len(trees)}")
        continue
    if has_overlap(trees):
        overlapping_fd.append(n)

print(f"Overlapping groups in submission_fd: {len(overlapping_fd)}")
if overlapping_fd:
    print(f"Groups: {overlapping_fd[:20]}...")