# Loop 5 Analysis: Fixing Overlap Issues

## Problem
Submission failed with 'Overlapping trees in group 031'
Our local overlap detection missed this overlap.

## Strategy
1. Use STRICTER overlap detection (Kaggle-compatible)
2. Only use N values from better solution that pass STRICT validation
3. Fall back to validated solution for any questionable N values

In [None]:
import pandas as pd
import numpy as np
import math
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import os

getcontext().prec = 30
scale_factor = Decimal('1e18')  # Higher precision like saspav kernel

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print('Setup complete')

In [None]:
def get_tree_polygon_strict(x, y, deg):
    """Create tree polygon with high precision (Kaggle-compatible)"""
    x_dec = Decimal(str(x))
    y_dec = Decimal(str(y))
    
    # Build polygon at origin with high precision
    vertices = []
    for tx, ty in zip(TX, TY):
        vertices.append((float(Decimal(str(tx)) * scale_factor), 
                        float(Decimal(str(ty)) * scale_factor)))
    
    initial_polygon = Polygon(vertices)
    
    # Rotate and translate
    rotated = affinity.rotate(initial_polygon, float(deg), origin=(0, 0))
    translated = affinity.translate(rotated, 
                                    xoff=float(x_dec * scale_factor),
                                    yoff=float(y_dec * scale_factor))
    return translated

def has_overlap_strict(trees_data):
    """Check for overlaps using STRtree (like saspav kernel)"""
    if len(trees_data) <= 1:
        return False, None
    
    polygons = [get_tree_polygon_strict(x, y, deg) for x, y, deg in trees_data]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                intersection = poly.intersection(polygons[idx])
                if intersection.area > 0:
                    return True, f"Overlap {i}-{idx}, area={intersection.area / (float(scale_factor)**2):.2e}"
    return False, None

def check_n_strict(df, n):
    """Check if N-tree configuration has overlaps using strict detection"""
    mask = df['id'].str.startswith(f'{n:03d}_')
    group = df[mask]
    
    if len(group) != n:
        return True, f"Wrong count: {len(group)} vs {n}"
    
    xs = group['x'].str[1:].astype(float).values
    ys = group['y'].str[1:].astype(float).values
    degs = group['deg'].str[1:].astype(float).values
    
    trees_data = list(zip(xs, ys, degs))
    return has_overlap_strict(trees_data)

print('Strict overlap detection defined')

In [None]:
# Load solutions
better_path = '/home/nonroot/snapshots/santa-2025/21328309254/code/experiments/003_valid_ensemble/submission.csv'
validated_path = '/home/code/experiments/004_sa_fast_v2_optimization/submission.csv'

df_better = pd.read_csv(better_path)
df_validated = pd.read_csv(validated_path)

print(f'Better solution loaded: {len(df_better)} rows')
print(f'Validated solution loaded: {len(df_validated)} rows')

In [None]:
# Check N=31 specifically in both solutions
print('Checking N=31 in better solution:')
has_overlap, info = check_n_strict(df_better, 31)
print(f'  Better: overlap={has_overlap}, info={info}')

print('\nChecking N=31 in validated solution:')
has_overlap, info = check_n_strict(df_validated, 31)
print(f'  Validated: overlap={has_overlap}, info={info}')

In [None]:
# Re-check ALL N values with strict detection
print('Re-checking all N values with STRICT detection...')

better_overlaps = []
for n in range(1, 201):
    has_overlap, info = check_n_strict(df_better, n)
    if has_overlap:
        better_overlaps.append((n, info))
        if len(better_overlaps) <= 15:
            print(f'  N={n:3d}: {info}')

print(f'\nTotal N values with overlaps in better solution: {len(better_overlaps)}')
print(f'Overlap N values: {[x[0] for x in better_overlaps]}')

In [None]:
# Verify validated solution has NO overlaps
print('\nVerifying validated solution...')
validated_overlaps = []
for n in range(1, 201):
    has_overlap, info = check_n_strict(df_validated, n)
    if has_overlap:
        validated_overlaps.append((n, info))
        print(f'  N={n:3d}: {info}')

if not validated_overlaps:
    print('  \u2713 Validated solution has NO overlaps!')
else:
    print(f'  \u2717 Validated solution has {len(validated_overlaps)} overlaps!')