# Loop 7 Analysis: Overlap Validation Issue

exp_006 failed with "Overlapping trees in group 002" despite passing local format validation.

The issue is that we're combining solutions from different snapshots that may have overlaps when validated with Kaggle's integer-scaling method.

**Key insight from kernel**: Kaggle uses integer scaling (1e18) for overlap detection.

In [None]:
import os
import json
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree

getcontext().prec = 30
SCALE = Decimal('1e18')

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print('Setup complete')

In [None]:
def create_tree_polygon_kaggle(x, y, angle):
    """Create tree polygon using Kaggle's integer scaling method."""
    # Use Decimal for high precision
    x_dec = Decimal(str(x))
    y_dec = Decimal(str(y))
    angle_dec = Decimal(str(angle))
    
    # Create base polygon with integer scaling
    pts = [(Decimal(str(tx)) * SCALE, Decimal(str(ty)) * SCALE) for tx, ty in zip(TX, TY)]
    poly = Polygon([(float(px), float(py)) for px, py in pts])
    
    # Rotate and translate
    poly = affinity.rotate(poly, float(angle_dec), origin=(0, 0))
    poly = affinity.translate(poly, float(x_dec * SCALE), float(y_dec * SCALE))
    
    return poly

def has_overlap_kaggle(trees):
    """Check for overlaps using Kaggle's method."""
    if len(trees) < 2:
        return False, []
    
    polys = [create_tree_polygon_kaggle(*t) for t in trees]
    overlapping_pairs = []
    
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):
                overlapping_pairs.append((i, j))
    
    return len(overlapping_pairs) > 0, overlapping_pairs

print('Kaggle validation functions defined')

In [None]:
# Load the failed submission and check N=2
failed_path = '/home/submission/submission.csv'

def load_n_from_csv(path, n):
    """Load trees for a specific N value."""
    trees = []
    with open(path, 'r') as f:
        next(f)  # Skip header
        for line in f:
            parts = line.strip().split(',')
            if len(parts) != 4:
                continue
            id_val, x, y, deg = parts
            n_str = id_val.split('_')[0]
            if int(n_str) == n:
                x_val = float(x[1:] if x.startswith('s') else x)
                y_val = float(y[1:] if y.startswith('s') else y)
                deg_val = float(deg[1:] if deg.startswith('s') else deg)
                trees.append((x_val, y_val, deg_val))
    return trees

# Check N=2 in the failed submission
trees_n2 = load_n_from_csv(failed_path, 2)
print(f'N=2 trees: {trees_n2}')

has_overlap, pairs = has_overlap_kaggle(trees_n2)
print(f'N=2 has overlap (Kaggle method): {has_overlap}')
if pairs:
    print(f'Overlapping pairs: {pairs}')

In [None]:
# Check which N values have overlaps in the failed submission
overlapping_ns = []
for n in range(1, 201):
    trees = load_n_from_csv(failed_path, n)
    has_overlap, pairs = has_overlap_kaggle(trees)
    if has_overlap:
        overlapping_ns.append(n)
        if len(overlapping_ns) <= 10:
            print(f'N={n}: OVERLAP detected, pairs: {pairs}')

print(f'\nTotal N values with overlaps: {len(overlapping_ns)}')
print(f'Overlapping N values: {overlapping_ns[:20]}...')

In [None]:
# Load the baseline (exp_001 which passed Kaggle) and check N=2
baseline_path = '/home/nonroot/snapshots/santa-2025/21145966992/submission/submission.csv'

baseline_n2 = load_n_from_csv(baseline_path, 2)
print(f'Baseline N=2 trees: {baseline_n2}')

has_overlap_base, pairs_base = has_overlap_kaggle(baseline_n2)
print(f'Baseline N=2 has overlap (Kaggle method): {has_overlap_base}')

In [None]:
# Compare N=2 between failed and baseline
print('Failed N=2:')
for i, t in enumerate(trees_n2):
    print(f'  Tree {i}: x={t[0]:.18f}, y={t[1]:.18f}, deg={t[2]:.18f}')

print('\nBaseline N=2:')
for i, t in enumerate(baseline_n2):
    print(f'  Tree {i}: x={t[0]:.18f}, y={t[1]:.18f}, deg={t[2]:.18f}')

print('\nAre they the same?')
for i in range(len(trees_n2)):
    same = (abs(trees_n2[i][0] - baseline_n2[i][0]) < 1e-15 and 
            abs(trees_n2[i][1] - baseline_n2[i][1]) < 1e-15 and
            abs(trees_n2[i][2] - baseline_n2[i][2]) < 1e-15)
    print(f'  Tree {i}: {same}')

In [None]:
# The solution: For any N with overlaps, fall back to baseline
# Let's create a safe ensemble that only uses improved N values that pass Kaggle validation

print('Creating safe ensemble...')

# Load baseline as raw strings
def load_snapshot_raw(path):
    rows_by_n = {}
    with open(path, 'r') as f:
        next(f)  # Skip header
        for line in f:
            parts = line.strip().split(',')
            if len(parts) != 4:
                continue
            id_val = parts[0]
            n = int(id_val.split('_')[0])
            if n not in rows_by_n:
                rows_by_n[n] = []
            rows_by_n[n].append(parts)
    return rows_by_n

baseline_raw = load_snapshot_raw(baseline_path)
print(f'Loaded baseline with {len(baseline_raw)} N values')

In [None]:
# For each N, check if the ensemble version has overlaps
# If it does, use baseline instead

failed_raw = load_snapshot_raw(failed_path)

safe_per_n = {}
fallback_count = 0

for n in range(1, 201):
    # Get ensemble version
    ensemble_trees = load_n_from_csv(failed_path, n)
    
    # Check for overlaps using Kaggle method
    has_overlap, _ = has_overlap_kaggle(ensemble_trees)
    
    if has_overlap:
        # Fall back to baseline
        safe_per_n[n] = baseline_raw[n]
        fallback_count += 1
    else:
        # Use ensemble version
        safe_per_n[n] = failed_raw[n]

print(f'Fallback to baseline for {fallback_count} N values')
print(f'Using ensemble for {200 - fallback_count} N values')

In [None]:
# Calculate scores for safe ensemble
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, angle):
    poly = Polygon(zip(TX, TY))
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def calculate_side(trees):
    polys = [create_tree_polygon(*t) for t in trees]
    union = unary_union(polys)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def parse_row(row):
    x = float(row[1][1:] if row[1].startswith('s') else row[1])
    y = float(row[2][1:] if row[2].startswith('s') else row[2])
    deg = float(row[3][1:] if row[3].startswith('s') else row[3])
    return (x, y, deg)

# Calculate total score
total_score = 0
for n in range(1, 201):
    trees = [parse_row(row) for row in safe_per_n[n]]
    side = calculate_side(trees)
    score = (side ** 2) / n
    total_score += score

print(f'Safe ensemble total score: {total_score:.6f}')

In [None]:
# Write safe ensemble
output_path = '/home/submission/submission.csv'

with open(output_path, 'w') as f:
    f.write('id,x,y,deg\n')
    for n in range(1, 201):
        for row in safe_per_n[n]:
            f.write(','.join(row) + '\n')

print(f'Saved safe ensemble to {output_path}')

# Verify no overlaps
print('\nVerifying no overlaps...')
overlap_count = 0
for n in range(1, 201):
    trees = load_n_from_csv(output_path, n)
    has_overlap, _ = has_overlap_kaggle(trees)
    if has_overlap:
        overlap_count += 1
        print(f'N={n}: STILL HAS OVERLAP!')

print(f'\nTotal N values with overlaps after fix: {overlap_count}')