# Loop 6 Analysis: Understanding Overlap Failures

exp_004 and exp_005 both failed with 'Overlapping trees in group 002'.
We need to understand why local validation passes but Kaggle rejects.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from decimal import Decimal, getcontext
import json

getcontext().prec = 30
print('Imports done')

Imports done


In [2]:
# Load the exp_005 submission that failed
df = pd.read_csv('/home/code/experiments/005_strict_ensemble/submission.csv')
print(f'Loaded {len(df)} rows')
print(df.head())

Loaded 20100 rows
      id                     x                    y                  deg
0  001_0  s-48.196086194214246  s58.770984615214225                s45.0
1  002_0       s0.154097069621     s-0.038540742695    s203.629377730657
2  002_1      s-0.154097069621     s-0.561459257305     s23.629377730657
3  003_0    s0.254937643697833  s-0.233436061549416  s113.56326044172948
4  003_1    s0.357722754471247   s0.250360566787394     s66.370622269343


In [3]:
# Extract N=2 data (the failing group)
def parse_s(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

n2_df = df[df['id'].str.startswith('002_')]
print(f'N=2 has {len(n2_df)} rows')
print(n2_df)

N=2 has 2 rows
      id                 x                 y                deg
1  002_0   s0.154097069621  s-0.038540742695  s203.629377730657
2  002_1  s-0.154097069621  s-0.561459257305   s23.629377730657


In [4]:
# Get the exact coordinates
xs = [parse_s(v) for v in n2_df['x'].values]
ys = [parse_s(v) for v in n2_df['y'].values]
degs = [parse_s(v) for v in n2_df['deg'].values]

print('Tree 0:', xs[0], ys[0], degs[0])
print('Tree 1:', xs[1], ys[1], degs[1])

Tree 0: 0.154097069621 -0.038540742695 203.629377730657
Tree 1: -0.154097069621 -0.561459257305 23.629377730657


In [5]:
# Create tree polygon using EXACT same method as Kaggle
def get_tree_polygon():
    vertices = [
        (0.0, 0.8),
        (0.125, 0.5), (0.0625, 0.5),
        (0.2, 0.25), (0.1, 0.25),
        (0.35, 0.0), (0.075, 0.0), (0.075, -0.2),
        (-0.075, -0.2), (-0.075, 0.0), (-0.35, 0.0),
        (-0.1, 0.25), (-0.2, 0.25),
        (-0.0625, 0.5), (-0.125, 0.5),
    ]
    return Polygon(vertices)

TREE = get_tree_polygon()
print(f'Tree area: {TREE.area}')
print(f'Tree bounds: {TREE.bounds}')

Tree area: 0.24562500000000004
Tree bounds: (-0.35, -0.2, 0.35, 0.8)


In [6]:
# Create the two trees for N=2
def create_tree(x, y, deg):
    return translate(rotate(TREE, deg, origin=(0, 0)), x, y)

tree0 = create_tree(xs[0], ys[0], degs[0])
tree1 = create_tree(xs[1], ys[1], degs[1])

print('Tree 0 bounds:', tree0.bounds)
print('Tree 1 bounds:', tree1.bounds)

Tree 0 bounds: (-0.16655799759476575, -0.7714666106167503, 0.47475213683676853, 0.17475213683691587)
Tree 1 bounds: (-0.4747521368367686, -0.7747521368369159, 0.16655799759476575, 0.17146661061675028)


In [7]:
# Check for overlap
print('\nOverlap check:')
print(f'Intersects: {tree0.intersects(tree1)}')
print(f'Touches: {tree0.touches(tree1)}')
print(f'Overlaps: {tree0.overlaps(tree1)}')

if tree0.intersects(tree1) and not tree0.touches(tree1):
    intersection = tree0.intersection(tree1)
    print(f'Intersection type: {intersection.geom_type}')
    print(f'Intersection area: {intersection.area}')
    print(f'Intersection: {intersection}')


Overlap check:
Intersects: True
Touches: False
Overlaps: True
Intersection type: MultiPolygon
Intersection area: 1.777433712564009e-24
Intersection: MULTIPOLYGON (((0.1665579975921483 -0.421172665397077, 0.1665579975947658 -0.4211726653981013, 0.1665579975939464 -0.4211726653984598, 0.1665579975921483 -0.421172665397077)), ((-0.1665579975947658 -0.1788273346018987, -0.1665579975939464 -0.1788273346015402, -0.1665579975921484 -0.1788273346029228, -0.1665579975947658 -0.1788273346018987)))


In [8]:
# Now let's check what the VALID baseline has for N=2
valid_baseline = pd.read_csv('/home/nonroot/snapshots/santa-2025/21328309254/submission/submission.csv')
n2_valid = valid_baseline[valid_baseline['id'].str.startswith('002_')]
print('Valid baseline N=2:')
print(n2_valid)

Valid baseline N=2:
      id                     x                      y                  deg
1  002_0   s0.1540970696213643  s-0.03854074269478543  s203.62937773065684
2  002_1  s-0.1540970696213643   s-0.5614592573052146  s23.629377730656792


In [9]:
# Compare the coordinates
xs_valid = [parse_s(v) for v in n2_valid['x'].values]
ys_valid = [parse_s(v) for v in n2_valid['y'].values]
degs_valid = [parse_s(v) for v in n2_valid['deg'].values]

print('\nValid baseline:')
print('Tree 0:', xs_valid[0], ys_valid[0], degs_valid[0])
print('Tree 1:', xs_valid[1], ys_valid[1], degs_valid[1])

print('\nOur submission:')
print('Tree 0:', xs[0], ys[0], degs[0])
print('Tree 1:', xs[1], ys[1], degs[1])

print('\nDifferences:')
print('dx0:', xs[0] - xs_valid[0], 'dy0:', ys[0] - ys_valid[0], 'ddeg0:', degs[0] - degs_valid[0])
print('dx1:', xs[1] - xs_valid[1], 'dy1:', ys[1] - ys_valid[1], 'ddeg1:', degs[1] - degs_valid[1])


Valid baseline:
Tree 0: 0.1540970696213643 -0.03854074269478543 203.62937773065684
Tree 1: -0.1540970696213643 -0.5614592573052146 23.629377730656792

Our submission:
Tree 0: 0.154097069621 -0.038540742695 203.629377730657
Tree 1: -0.154097069621 -0.561459257305 23.629377730657

Differences:
dx0: -3.642919299551295e-13 dy0: -2.1456447729661932e-13 ddeg0: 1.7053025658242404e-13
dx1: 3.642919299551295e-13 dy1: 2.1460611066004276e-13 ddeg1: 2.0961010704922955e-13


In [10]:
# Check the valid baseline trees for overlap
tree0_valid = create_tree(xs_valid[0], ys_valid[0], degs_valid[0])
tree1_valid = create_tree(xs_valid[1], ys_valid[1], degs_valid[1])

print('Valid baseline overlap check:')
print(f'Intersects: {tree0_valid.intersects(tree1_valid)}')
print(f'Touches: {tree0_valid.touches(tree1_valid)}')

if tree0_valid.intersects(tree1_valid) and not tree0_valid.touches(tree1_valid):
    intersection = tree0_valid.intersection(tree1_valid)
    print(f'Intersection area: {intersection.area}')
else:
    print('No overlap!')

Valid baseline overlap check:
Intersects: False
Touches: False
No overlap!


In [11]:
# The issue is clear: our N=2 solution has overlap, valid baseline doesn't
# Let's check which snapshot contributed the N=2 solution

# Find all snapshots and check their N=2 solutions
import glob

csv_files = glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/submission.csv')
print(f'Found {len(csv_files)} snapshot submissions')

# Find which one matches our N=2
for csv_path in csv_files:
    try:
        df_snap = pd.read_csv(csv_path)
        n2_snap = df_snap[df_snap['id'].str.startswith('002_')]
        if len(n2_snap) == 2:
            xs_snap = [parse_s(v) for v in n2_snap['x'].values]
            ys_snap = [parse_s(v) for v in n2_snap['y'].values]
            if abs(xs_snap[0] - xs[0]) < 1e-10 and abs(ys_snap[0] - ys[0]) < 1e-10:
                print(f'\nMatch found: {csv_path}')
                print(n2_snap)
                break
    except:
        pass

Found 88 snapshot submissions

Match found: /home/nonroot/snapshots/santa-2025/21116303805/submission/submission.csv
      id                       x                       y  \
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   

                       deg  
1  s203.629377730656841550  
2   s23.629377730656791812  


In [None]:
# The solution is clear: we need to use the VALID baseline for N=2
# Let's create a fixed submission that uses valid baseline for any N with overlaps

# First, identify ALL N values with overlaps in our submission
overlapping_ns = []

for n in range(1, 201):
    n_df = df[df['id'].str.startswith(f'{n:03d}_')]
    if len(n_df) != n:
        continue
    
    xs_n = [parse_s(v) for v in n_df['x'].values]
    ys_n = [parse_s(v) for v in n_df['y'].values]
    degs_n = [parse_s(v) for v in n_df['deg'].values]
    
    trees = [create_tree(xs_n[i], ys_n[i], degs_n[i]) for i in range(n)]
    
    has_overlap = False
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if trees[i].intersects(trees[j]) and not trees[i].touches(trees[j]):
                intersection = trees[i].intersection(trees[j])
                if intersection.area > 0:  # ANY overlap, no matter how small
                    has_overlap = True
                    print(f'N={n}: trees {i},{j} overlap (area={intersection.area:.2e})')
                    break
        if has_overlap:
            break
    
    if has_overlap:
        overlapping_ns.append(n)

print(f'\nTotal N values with overlaps: {len(overlapping_ns)}')
print(f'Overlapping N values: {overlapping_ns}')