# Loop 1 Analysis: Overlap Issue Investigation

The baseline submission failed with "Overlapping trees in group 002". We need to:
1. Understand why our local validation passed but Kaggle rejected
2. Implement stricter validation
3. Find/create valid solutions

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
from decimal import Decimal, getcontext
import json

# Set high precision for decimal operations
getcontext().prec = 30
SCALE = 10**18

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg."""
    poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(poly, deg, origin=(0, 0))
    return affinity.translate(rotated, x, y)

def parse_value(s):
    """Parse 's' prefixed value."""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def load_submission(path):
    """Load a submission file and parse coordinates."""
    df = pd.read_csv(path)
    df['x'] = df['x'].apply(parse_value)
    df['y'] = df['y'].apply(parse_value)
    df['deg'] = df['deg'].apply(parse_value)
    df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    return df

print("Functions defined")

In [None]:
# Load the failed submission and check N=2 specifically
df = load_submission('/home/code/experiments/001_baseline/submission.csv')

# Get N=2 trees
n2_trees = df[df['n'] == 2]
print(f"N=2 has {len(n2_trees)} trees:")
for _, row in n2_trees.iterrows():
    print(f"  Tree {row['id']}: x={row['x']:.6f}, y={row['y']:.6f}, deg={row['deg']:.6f}")

# Create polygons and check overlap
polys = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in n2_trees.iterrows()]

print(f"\nPolygon 0 bounds: {polys[0].bounds}")
print(f"Polygon 1 bounds: {polys[1].bounds}")

# Check various overlap conditions
print(f"\nintersects: {polys[0].intersects(polys[1])}")
print(f"touches: {polys[0].touches(polys[1])}")
print(f"overlaps: {polys[0].overlaps(polys[1])}")
print(f"intersection area: {polys[0].intersection(polys[1]).area}")

In [None]:
# Implement STRICT validation using integer scaling (Kaggle's method)
def validate_no_overlap_strict(trees_df, n):
    """Validate no overlaps using integer-scaled coordinates for precision."""
    trees = trees_df[trees_df['n'] == n]
    if len(trees) == 0:
        return True, "No trees"
    
    polygons = []
    for _, row in trees.iterrows():
        poly = create_tree_polygon(row['x'], row['y'], row['deg'])
        # Scale to integers for precise comparison
        coords = list(poly.exterior.coords)
        int_coords = [(int(Decimal(str(x)) * SCALE), int(Decimal(str(y)) * SCALE)) for x, y in coords]
        int_poly = Polygon(int_coords)
        polygons.append(int_poly)
    
    # Check all pairs
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 0:
                    return False, f"Trees {i} and {j} overlap with area {intersection.area / (SCALE**2)}"
    return True, "OK"

# Check N=2 with strict validation
ok, msg = validate_no_overlap_strict(df, 2)
print(f"N=2 strict validation: {ok} - {msg}")

In [None]:
# Check all N values for overlaps
print("Checking all N values for overlaps...")
overlap_issues = []

for n in range(1, 201):
    ok, msg = validate_no_overlap_strict(df, n)
    if not ok:
        overlap_issues.append((n, msg))
        print(f"N={n}: OVERLAP - {msg}")

print(f"\nTotal N values with overlaps: {len(overlap_issues)}")

In [None]:
# Let's find a submission that passes strict validation
import glob

print("Searching for valid submissions...")
all_submissions = glob.glob('/home/nonroot/snapshots/santa-2025/**/submission.csv', recursive=True)

valid_submissions = []
for f in all_submissions[:30]:  # Check first 30
    try:
        test_df = load_submission(f)
        if len(test_df) < 20000:
            continue
        
        # Check N=2 specifically (the failing case)
        ok, msg = validate_no_overlap_strict(test_df, 2)
        if ok:
            valid_submissions.append(f)
            print(f"Valid N=2: {f}")
    except Exception as e:
        pass

print(f"\nFound {len(valid_submissions)} submissions with valid N=2")