# Loop 7 Analysis: Overlap Validation Issue

exp_006 failed with "Overlapping trees in group 002" despite passing local format validation.

The issue is that we're combining solutions from different snapshots that may have overlaps when validated with Kaggle's integer-scaling method.

**Key insight from kernel**: Kaggle uses integer scaling (1e18) for overlap detection.

In [1]:
import os
import json
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree

getcontext().prec = 30
SCALE = Decimal('1e18')

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print('Setup complete')

Setup complete


In [2]:
def create_tree_polygon_kaggle(x, y, angle):
    """Create tree polygon using Kaggle's integer scaling method."""
    # Use Decimal for high precision
    x_dec = Decimal(str(x))
    y_dec = Decimal(str(y))
    angle_dec = Decimal(str(angle))
    
    # Create base polygon with integer scaling
    pts = [(Decimal(str(tx)) * SCALE, Decimal(str(ty)) * SCALE) for tx, ty in zip(TX, TY)]
    poly = Polygon([(float(px), float(py)) for px, py in pts])
    
    # Rotate and translate
    poly = affinity.rotate(poly, float(angle_dec), origin=(0, 0))
    poly = affinity.translate(poly, float(x_dec * SCALE), float(y_dec * SCALE))
    
    return poly

def has_overlap_kaggle(trees):
    """Check for overlaps using Kaggle's method."""
    if len(trees) < 2:
        return False, []
    
    polys = [create_tree_polygon_kaggle(*t) for t in trees]
    overlapping_pairs = []
    
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):
                overlapping_pairs.append((i, j))
    
    return len(overlapping_pairs) > 0, overlapping_pairs

print('Kaggle validation functions defined')

Kaggle validation functions defined


In [3]:
# Load the failed submission and check N=2
failed_path = '/home/submission/submission.csv'

def load_n_from_csv(path, n):
    """Load trees for a specific N value."""
    trees = []
    with open(path, 'r') as f:
        next(f)  # Skip header
        for line in f:
            parts = line.strip().split(',')
            if len(parts) != 4:
                continue
            id_val, x, y, deg = parts
            n_str = id_val.split('_')[0]
            if int(n_str) == n:
                x_val = float(x[1:] if x.startswith('s') else x)
                y_val = float(y[1:] if y.startswith('s') else y)
                deg_val = float(deg[1:] if deg.startswith('s') else deg)
                trees.append((x_val, y_val, deg_val))
    return trees

# Check N=2 in the failed submission
trees_n2 = load_n_from_csv(failed_path, 2)
print(f'N=2 trees: {trees_n2}')

has_overlap, pairs = has_overlap_kaggle(trees_n2)
print(f'N=2 has overlap (Kaggle method): {has_overlap}')
if pairs:
    print(f'Overlapping pairs: {pairs}')

N=2 trees: [(0.1540970696213559, -0.03854074269479465, 144.27276086312358), (-0.15409706962137285, -0.5614592573052241, 324.27276086312355)]
N=2 has overlap (Kaggle method): True
Overlapping pairs: [(0, 1)]


In [4]:
# Check which N values have overlaps in the failed submission
overlapping_ns = []
for n in range(1, 201):
    trees = load_n_from_csv(failed_path, n)
    has_overlap, pairs = has_overlap_kaggle(trees)
    if has_overlap:
        overlapping_ns.append(n)
        if len(overlapping_ns) <= 10:
            print(f'N={n}: OVERLAP detected, pairs: {pairs}')

print(f'\nTotal N values with overlaps: {len(overlapping_ns)}')
print(f'Overlapping N values: {overlapping_ns[:20]}...')

N=2: OVERLAP detected, pairs: [(0, 1)]
N=4: OVERLAP detected, pairs: [(0, 1), (0, 3), (1, 2), (1, 3)]
N=5: OVERLAP detected, pairs: [(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]
N=16: OVERLAP detected, pairs: [(0, 3), (0, 4), (0, 12), (0, 13), (1, 5), (1, 7), (1, 9), (1, 14), (3, 4), (3, 11), (4, 11), (4, 12), (5, 10), (5, 14), (5, 15), (6, 12), (6, 13), (7, 9), (9, 14), (12, 13), (14, 15)]


N=40: OVERLAP detected, pairs: [(0, 20), (1, 11), (1, 22), (2, 21), (2, 38), (3, 10), (3, 21), (4, 18), (4, 29), (5, 32), (6, 32), (6, 37), (7, 29), (7, 37), (8, 14), (8, 38), (9, 31), (10, 17), (11, 12), (11, 19), (12, 34), (13, 26), (13, 36), (14, 25), (15, 16), (15, 19), (16, 31), (17, 39), (18, 30), (19, 33), (20, 27), (23, 25), (27, 34), (27, 38), (28, 36), (28, 39), (30, 33)]
N=46: OVERLAP detected, pairs: [(0, 8), (0, 33), (1, 9), (1, 37), (3, 15), (3, 24), (3, 33), (4, 25), (5, 28), (5, 39), (6, 27), (7, 32), (7, 44), (9, 21), (9, 36), (10, 18), (11, 12), (12, 20), (13, 40), (14, 23), (14, 30), (15, 22), (15, 33), (16, 44), (17, 23), (17, 35), (19, 38), (19, 40), (20, 32), (21, 39), (22, 45), (24, 29), (25, 29), (27, 30), (28, 41), (31, 35), (34, 38), (36, 39), (37, 43)]
N=47: OVERLAP detected, pairs: [(0, 8), (0, 33), (1, 9), (1, 37), (3, 24), (3, 33), (4, 25), (5, 28), (5, 39), (6, 27), (7, 32), (7, 45), (9, 21), (9, 36), (10, 18), (10, 43), (11, 12), (12, 20), (13, 40), (14,

N=53: OVERLAP detected, pairs: [(0, 1), (0, 6), (0, 28), (0, 42), (1, 11), (1, 36), (2, 20), (2, 26), (2, 48), (3, 7), (3, 23), (3, 45), (3, 46), (4, 17), (4, 18), (4, 32), (4, 44), (5, 11), (5, 29), (5, 45), (6, 7), (6, 24), (6, 50), (7, 42), (7, 44), (8, 24), (8, 37), (8, 39), (8, 50), (9, 20), (9, 21), (9, 27), (9, 37), (10, 35), (10, 51), (10, 52), (11, 42), (12, 20), (12, 37), (12, 39), (13, 20), (13, 26), (13, 27), (13, 47), (14, 43), (15, 40), (15, 49), (16, 35), (17, 21), (17, 40), (17, 49), (18, 23), (18, 25), (18, 49), (19, 21), (19, 27), (19, 30), (19, 40), (21, 32), (22, 33), (22, 39), (22, 50), (23, 41), (23, 44), (24, 32), (24, 44), (25, 41), (26, 43), (27, 31), (28, 33), (28, 36), (28, 50), (29, 35), (29, 51), (30, 31), (30, 38), (31, 47), (32, 37), (34, 39), (38, 40), (41, 46), (42, 45), (43, 47), (45, 51), (46, 51), (46, 52)]
N=54: OVERLAP detected, pairs: [(0, 1), (0, 6), (0, 28), (0, 43), (1, 11), (1, 37), (2, 20), (2, 26), (2, 34), (2, 49), (3, 7), (3, 23), (3, 46),


Total N values with overlaps: 57
Overlapping N values: [2, 4, 5, 16, 40, 46, 47, 48, 53, 54, 55, 56, 59, 62, 69, 70, 71, 77, 78, 79]...


In [5]:
# Load the baseline (exp_001 which passed Kaggle) and check N=2
baseline_path = '/home/nonroot/snapshots/santa-2025/21145966992/submission/submission.csv'

baseline_n2 = load_n_from_csv(baseline_path, 2)
print(f'Baseline N=2 trees: {baseline_n2}')

has_overlap_base, pairs_base = has_overlap_kaggle(baseline_n2)
print(f'Baseline N=2 has overlap (Kaggle method): {has_overlap_base}')

Baseline N=2 trees: [(0.1540970696213559, -0.03854074269479465, 144.27276086312358), (-0.15409706962137285, -0.5614592573052241, 324.27276086312355)]
Baseline N=2 has overlap (Kaggle method): True


In [6]:
# Compare N=2 between failed and baseline
print('Failed N=2:')
for i, t in enumerate(trees_n2):
    print(f'  Tree {i}: x={t[0]:.18f}, y={t[1]:.18f}, deg={t[2]:.18f}')

print('\nBaseline N=2:')
for i, t in enumerate(baseline_n2):
    print(f'  Tree {i}: x={t[0]:.18f}, y={t[1]:.18f}, deg={t[2]:.18f}')

print('\nAre they the same?')
for i in range(len(trees_n2)):
    same = (abs(trees_n2[i][0] - baseline_n2[i][0]) < 1e-15 and 
            abs(trees_n2[i][1] - baseline_n2[i][1]) < 1e-15 and
            abs(trees_n2[i][2] - baseline_n2[i][2]) < 1e-15)
    print(f'  Tree {i}: {same}')

Failed N=2:
  Tree 0: x=0.154097069621355887, y=-0.038540742694794648, deg=144.272760863123579611
  Tree 1: x=-0.154097069621372845, y=-0.561459257305224058, deg=324.272760863123551189

Baseline N=2:
  Tree 0: x=0.154097069621355887, y=-0.038540742694794648, deg=144.272760863123579611
  Tree 1: x=-0.154097069621372845, y=-0.561459257305224058, deg=324.272760863123551189

Are they the same?
  Tree 0: True
  Tree 1: True


In [7]:
# The solution: For any N with overlaps, fall back to baseline
# Let's create a safe ensemble that only uses improved N values that pass Kaggle validation

print('Creating safe ensemble...')

# Load baseline as raw strings
def load_snapshot_raw(path):
    rows_by_n = {}
    with open(path, 'r') as f:
        next(f)  # Skip header
        for line in f:
            parts = line.strip().split(',')
            if len(parts) != 4:
                continue
            id_val = parts[0]
            n = int(id_val.split('_')[0])
            if n not in rows_by_n:
                rows_by_n[n] = []
            rows_by_n[n].append(parts)
    return rows_by_n

baseline_raw = load_snapshot_raw(baseline_path)
print(f'Loaded baseline with {len(baseline_raw)} N values')

Creating safe ensemble...
Loaded baseline with 200 N values


In [8]:
# For each N, check if the ensemble version has overlaps
# If it does, use baseline instead

failed_raw = load_snapshot_raw(failed_path)

safe_per_n = {}
fallback_count = 0

for n in range(1, 201):
    # Get ensemble version
    ensemble_trees = load_n_from_csv(failed_path, n)
    
    # Check for overlaps using Kaggle method
    has_overlap, _ = has_overlap_kaggle(ensemble_trees)
    
    if has_overlap:
        # Fall back to baseline
        safe_per_n[n] = baseline_raw[n]
        fallback_count += 1
    else:
        # Use ensemble version
        safe_per_n[n] = failed_raw[n]

print(f'Fallback to baseline for {fallback_count} N values')
print(f'Using ensemble for {200 - fallback_count} N values')

Fallback to baseline for 57 N values
Using ensemble for 143 N values


In [9]:
# Calculate scores for safe ensemble
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, angle):
    poly = Polygon(zip(TX, TY))
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def calculate_side(trees):
    polys = [create_tree_polygon(*t) for t in trees]
    union = unary_union(polys)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def parse_row(row):
    x = float(row[1][1:] if row[1].startswith('s') else row[1])
    y = float(row[2][1:] if row[2].startswith('s') else row[2])
    deg = float(row[3][1:] if row[3].startswith('s') else row[3])
    return (x, y, deg)

# Calculate total score
total_score = 0
for n in range(1, 201):
    trees = [parse_row(row) for row in safe_per_n[n]]
    side = calculate_side(trees)
    score = (side ** 2) / n
    total_score += score

print(f'Safe ensemble total score: {total_score:.6f}')

Safe ensemble total score: 70.522682


In [10]:
# Write safe ensemble
output_path = '/home/submission/submission.csv'

with open(output_path, 'w') as f:
    f.write('id,x,y,deg\n')
    for n in range(1, 201):
        for row in safe_per_n[n]:
            f.write(','.join(row) + '\n')

print(f'Saved safe ensemble to {output_path}')

# Verify no overlaps
print('\nVerifying no overlaps...')
overlap_count = 0
for n in range(1, 201):
    trees = load_n_from_csv(output_path, n)
    has_overlap, _ = has_overlap_kaggle(trees)
    if has_overlap:
        overlap_count += 1
        print(f'N={n}: STILL HAS OVERLAP!')

print(f'\nTotal N values with overlaps after fix: {overlap_count}')

Saved safe ensemble to /home/submission/submission.csv

Verifying no overlaps...
N=2: STILL HAS OVERLAP!
N=4: STILL HAS OVERLAP!
N=5: STILL HAS OVERLAP!
N=16: STILL HAS OVERLAP!


N=40: STILL HAS OVERLAP!
N=46: STILL HAS OVERLAP!
N=47: STILL HAS OVERLAP!
N=48: STILL HAS OVERLAP!


N=53: STILL HAS OVERLAP!
N=54: STILL HAS OVERLAP!
N=55: STILL HAS OVERLAP!
N=56: STILL HAS OVERLAP!
N=59: STILL HAS OVERLAP!


N=62: STILL HAS OVERLAP!


N=69: STILL HAS OVERLAP!
N=70: STILL HAS OVERLAP!
N=71: STILL HAS OVERLAP!


N=77: STILL HAS OVERLAP!
N=78: STILL HAS OVERLAP!
N=79: STILL HAS OVERLAP!
N=80: STILL HAS OVERLAP!


N=96: STILL HAS OVERLAP!
N=97: STILL HAS OVERLAP!
N=99: STILL HAS OVERLAP!


N=107: STILL HAS OVERLAP!
N=108: STILL HAS OVERLAP!
N=109: STILL HAS OVERLAP!
N=110: STILL HAS OVERLAP!


N=118: STILL HAS OVERLAP!
N=119: STILL HAS OVERLAP!
N=120: STILL HAS OVERLAP!


N=124: STILL HAS OVERLAP!
N=125: STILL HAS OVERLAP!
N=126: STILL HAS OVERLAP!


N=129: STILL HAS OVERLAP!
N=130: STILL HAS OVERLAP!
N=131: STILL HAS OVERLAP!


N=139: STILL HAS OVERLAP!
N=140: STILL HAS OVERLAP!


N=150: STILL HAS OVERLAP!
N=152: STILL HAS OVERLAP!


N=153: STILL HAS OVERLAP!
N=154: STILL HAS OVERLAP!
N=155: STILL HAS OVERLAP!


N=156: STILL HAS OVERLAP!


N=166: STILL HAS OVERLAP!
N=167: STILL HAS OVERLAP!


N=168: STILL HAS OVERLAP!


N=175: STILL HAS OVERLAP!
N=176: STILL HAS OVERLAP!


N=177: STILL HAS OVERLAP!
N=178: STILL HAS OVERLAP!


N=179: STILL HAS OVERLAP!


N=185: STILL HAS OVERLAP!


N=190: STILL HAS OVERLAP!
N=191: STILL HAS OVERLAP!


N=192: STILL HAS OVERLAP!



Total N values with overlaps after fix: 57


In [11]:
# CRITICAL INSIGHT: The baseline passed Kaggle but our validation says it has overlaps!
# This means our validation is WRONG - it's too strict.

# Let's check the actual overlap area for N=2 in baseline
from shapely.geometry import Polygon
from shapely import affinity

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_simple(x, y, angle):
    poly = Polygon(zip(TX, TY))
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

# Check baseline N=2 with standard Shapely (no scaling)
t0 = create_tree_simple(*baseline_n2[0])
t1 = create_tree_simple(*baseline_n2[1])

print(f"Tree 0 area: {t0.area}")
print(f"Tree 1 area: {t1.area}")
print(f"Intersects: {t0.intersects(t1)}")
print(f"Touches: {t0.touches(t1)}")
print(f"Overlaps: {t0.overlaps(t1)}")

if t0.intersects(t1):
    intersection = t0.intersection(t1)
    print(f"Intersection area: {intersection.area}")
    print(f"Intersection type: {intersection.geom_type}")

Tree 0 area: 0.245625
Tree 1 area: 0.24562499999999998
Intersects: True
Touches: False
Overlaps: True
Intersection area: 0.14942653303750386
Intersection type: Polygon


In [12]:
# Let me check the raw coordinates from the baseline file
with open(baseline_path, 'r') as f:
    lines = f.readlines()

# Find N=2 rows
n2_rows = [l for l in lines if l.startswith('002_')]
print("Raw N=2 rows from baseline:")
for row in n2_rows:
    print(f"  {row.strip()}")

Raw N=2 rows from baseline:
  002_0,s0.154097069621355887,s-0.038540742694794648,s144.272760863123583164
  002_1,s-0.154097069621372845,s-0.561459257305224058,s324.272760863123533426
