# Loop 2 Analysis: Debug Overlap Issue

The submission failed with 'Overlapping trees in group 002'. Let's investigate.

In [1]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
from utils import ChristmasTree, load_submission, load_trees_for_n, has_overlap, get_bounding_box_side
from shapely.geometry import Polygon
from shapely import affinity

# Load the failed submission
failed_df = pd.read_csv('/home/submission/submission.csv')
print(f"Submission shape: {failed_df.shape}")

# Check N=2 specifically
n2_rows = failed_df[failed_df['id'].str.startswith('002_')]
print(f"\nN=2 rows:")
print(n2_rows)

Submission shape: (20100, 4)

N=2 rows:
      id                         x                          y  \
1  002_0   s0.15409700000000001174  s-0.038540999999999998871   
2  002_1  s-0.15409700000000001174   s-0.56145900000000004137   

                      deg  
1  s203.62937800000000266  
2  s23.629377999999999105  


In [2]:
# Parse the N=2 trees and check for overlap
def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return s[1:]
    return str(s)

trees_n2 = load_trees_for_n(failed_df, 2)
print(f"Loaded {len(trees_n2)} trees for N=2")

# Check overlap
has_ovlp, overlaps = has_overlap(trees_n2)
print(f"Has overlap: {has_ovlp}")
print(f"Overlaps: {overlaps}")

# Visualize the polygons
for i, tree in enumerate(trees_n2):
    print(f"\nTree {i}:")
    print(f"  Center: ({tree.center_x}, {tree.center_y})")
    print(f"  Angle: {tree.angle}")
    print(f"  Bounds: {tree.polygon.bounds}")

Loaded 2 trees for N=2
Has overlap: False
Overlaps: []

Tree 0:
  Center: (0.15409700000000001174, -0.038540999999999998871)
  Angle: 203.62937800000000266
  Bounds: (-0.16655806655628944, -0.771466866414376, 0.47475207066119585, 0.1747518794780811)

Tree 1:
  Center: (-0.15409700000000001174, -0.56145900000000004137)
  Angle: 23.629377999999999105
  Bounds: (-0.47475207066119596, -0.7747518794780811, 0.16655806655628938, 0.1714668664143758)


In [3]:
# Check if the polygons actually intersect
from shapely.ops import unary_union

p0 = trees_n2[0].polygon
p1 = trees_n2[1].polygon

print(f"Polygon 0 area: {p0.area}")
print(f"Polygon 1 area: {p1.area}")
print(f"Intersects: {p0.intersects(p1)}")
print(f"Touches: {p0.touches(p1)}")

if p0.intersects(p1):
    intersection = p0.intersection(p1)
    print(f"Intersection area: {intersection.area}")
    print(f"Intersection type: {intersection.geom_type}")

Polygon 0 area: 0.24562499999999998
Polygon 1 area: 0.24562499999999998
Intersects: True
Touches: False
Intersection area: 7.019842217938716e-13
Intersection type: MultiPolygon


In [4]:
# Load the baseline N=2 that we know works
baseline_df = pd.read_csv('/home/nonroot/snapshots/santa-2025/21329069570/code/code/solutions/submission_70.627569.csv')
baseline_n2 = load_trees_for_n(baseline_df, 2)

print("Baseline N=2:")
for i, tree in enumerate(baseline_n2):
    print(f"  Tree {i}: center=({tree.center_x}, {tree.center_y}), angle={tree.angle}")

has_ovlp_baseline, _ = has_overlap(baseline_n2)
print(f"\nBaseline has overlap: {has_ovlp_baseline}")

Baseline N=2:
  Tree 0: center=(0.154097069621000, -0.038540742695000), angle=203.629377730657012
  Tree 1: center=(-0.154097069621000, -0.561459257305000), angle=23.629377730657001

Baseline has overlap: False


In [5]:
# Compare the sources - where did the N=2 come from in the ensemble?
# Let's check if the ensemble picked a different source for N=2

# The ensemble notebook should have recorded which source was used
# Let's check the original baseline vs what was submitted

baseline_n2_rows = baseline_df[baseline_df['id'].str.startswith('002_')]
print("Baseline N=2 rows:")
print(baseline_n2_rows)

print("\nSubmitted N=2 rows:")
print(n2_rows)

# Are they the same?
print(f"\nSame values? {baseline_n2_rows['x'].values[0] == n2_rows['x'].values[0]}")

Baseline N=2 rows:
      id                    x                    y                   deg
1  002_0   s0.154097069621000  s-0.038540742695000  s203.629377730657012
2  002_1  s-0.154097069621000  s-0.561459257305000   s23.629377730657001

Submitted N=2 rows:
      id                         x                          y  \
1  002_0   s0.15409700000000001174  s-0.038540999999999998871   
2  002_1  s-0.15409700000000001174   s-0.56145900000000004137   

                      deg  
1  s203.62937800000000266  
2  s23.629377999999999105  

Same values? False


In [6]:
# The issue is clear - the ensemble picked a different N=2 configuration
# that has overlapping trees. Our overlap detection must have failed.

# Let's test with a stricter tolerance
def has_overlap_strict(trees, tolerance=1e-9):
    if len(trees) <= 1:
        return False, []
    
    polygons = [t.polygon for t in trees]
    overlaps = []
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                if not polygons[i].touches(polygons[j]):
                    intersection = polygons[i].intersection(polygons[j])
                    if intersection.area > tolerance:
                        overlaps.append((i, j, intersection.area))
    
    return len(overlaps) > 0, overlaps

has_ovlp_strict, overlaps_strict = has_overlap_strict(trees_n2, tolerance=1e-15)
print(f"Strict overlap check: {has_ovlp_strict}")
print(f"Overlaps: {overlaps_strict}")

Strict overlap check: True
Overlaps: [(0, 1, 7.019842217938716e-13)]


In [7]:
# The problem is that our overlap detection tolerance was too loose (1e-12)
# Kaggle's validation is stricter.

# Let's fix this by using the baseline for N=2 and resubmitting
# First, let's verify the baseline N=2 passes strict overlap check

has_ovlp_baseline_strict, _ = has_overlap_strict(baseline_n2, tolerance=1e-15)
print(f"Baseline N=2 strict overlap: {has_ovlp_baseline_strict}")

# Calculate scores
from utils import calculate_score_for_n
score_baseline_n2 = calculate_score_for_n(baseline_n2, 2)
score_submitted_n2 = calculate_score_for_n(trees_n2, 2)

print(f"\nBaseline N=2 score: {score_baseline_n2:.6f}")
print(f"Submitted N=2 score: {score_submitted_n2:.6f}")
print(f"Difference: {score_baseline_n2 - score_submitted_n2:.6f}")

Baseline N=2 strict overlap: False

Baseline N=2 score: 0.450779
Submitted N=2 score: 0.450779
Difference: 0.000000


In [None]:
# FIX: Create a corrected submission using stricter overlap detection
# We need to go back to the baseline for any N values where the ensemble has overlaps

# First, let's check ALL N values for overlaps with strict tolerance
print("Checking all N values for overlaps with strict tolerance...")

overlapping_ns = []
for n in range(1, 201):
    trees = load_trees_for_n(failed_df, n)
    has_ovlp, overlaps = has_overlap_strict(trees, tolerance=1e-15)
    if has_ovlp:
        overlapping_ns.append((n, overlaps))
        print(f"N={n}: OVERLAP detected - {overlaps}")

print(f"\nTotal overlapping N values: {len(overlapping_ns)}")