# Fix Overlaps with Strict Precision

The baseline submission failed with 'Overlapping trees in group 040'. We need to:
1. Implement strict overlap detection with 1e15 scaling (matching Kaggle's validation)
2. Find all N values with overlaps
3. Fix overlapping trees by moving them apart
4. Validate the fixed submission

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
from shapely.strtree import STRtree
import json
import math
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 25

# Tree shape vertices (from competition description)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print("Setup complete")

Setup complete


In [2]:
def parse_value(val):
    """Parse value from submission format (with 's' prefix)."""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def create_tree_polygon(x, y, angle):
    """Create a tree polygon at position (x, y) with given rotation angle.
    Uses standard precision for bounding box calculations."""
    x = float(x)
    y = float(y)
    angle = float(angle)
    
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def create_scaled_tree_polygon(x, y, angle, scale_factor=1e15):
    """Create tree polygon with scaling for strict precision overlap detection.
    This matches Kaggle's validation approach."""
    x = float(x)
    y = float(y)
    angle = float(angle)
    
    # Create polygon with scaled coordinates
    coords = [(tx * scale_factor, ty * scale_factor) for tx, ty in zip(TX, TY)]
    poly = Polygon(coords)
    
    # Rotate and translate with scaling
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x * scale_factor, yoff=y * scale_factor)
    return poly

print("Functions defined")

Functions defined


In [3]:
def has_overlap_strict(polygons):
    """Check for overlaps using strict precision.
    Returns (has_overlap, tree_i, tree_j) for first overlap found."""
    n = len(polygons)
    for i in range(n):
        for j in range(i+1, n):
            if polygons[i].intersects(polygons[j]):
                # Check if it's more than just touching
                if not polygons[i].touches(polygons[j]):
                    # Check intersection area
                    intersection = polygons[i].intersection(polygons[j])
                    if intersection.area > 0:
                        return True, i, j
    return False, -1, -1

def find_all_overlaps(polygons):
    """Find all overlapping pairs."""
    overlaps = []
    n = len(polygons)
    for i in range(n):
        for j in range(i+1, n):
            if polygons[i].intersects(polygons[j]):
                if not polygons[i].touches(polygons[j]):
                    intersection = polygons[i].intersection(polygons[j])
                    if intersection.area > 0:
                        overlaps.append((i, j, intersection.area))
    return overlaps

print("Overlap detection functions defined")

Overlap detection functions defined


In [4]:
# Load baseline submission
df = pd.read_csv('/home/code/experiments/000_baseline/submission.csv')
print(f"Loaded {len(df)} rows")

# Parse into structured format
trees_by_n = {}
for _, row in df.iterrows():
    id_parts = row['id'].split('_')
    n = int(id_parts[0])
    idx = int(id_parts[1])
    
    if n not in trees_by_n:
        trees_by_n[n] = []
    
    trees_by_n[n].append({
        'idx': idx,
        'x': parse_value(row['x']),
        'y': parse_value(row['y']),
        'deg': parse_value(row['deg'])
    })

print(f"Parsed trees for N=1 to {max(trees_by_n.keys())}")
print(f"N=40 has {len(trees_by_n[40])} trees")

Loaded 20100 rows


Parsed trees for N=1 to 200
N=40 has 40 trees


In [5]:
# Check for overlaps in ALL N values using strict precision
print("Checking for overlaps with strict precision (1e15 scaling)...")
overlap_ns = []

for n in range(1, 201):
    trees = trees_by_n[n]
    # Create scaled polygons for strict overlap detection
    polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    
    has_overlap, i, j = has_overlap_strict(polygons)
    if has_overlap:
        overlap_ns.append(n)
        if n <= 50 or n == 40:  # Print details for small N and N=40
            print(f"  N={n}: Trees {i} and {j} overlap")

print(f"\nTotal N values with overlaps: {len(overlap_ns)}")
print(f"Overlapping N values: {overlap_ns}")

Checking for overlaps with strict precision (1e15 scaling)...
  N=25: Trees 16 and 22 overlap
  N=33: Trees 0 and 30 overlap
  N=40: Trees 10 and 17 overlap



Total N values with overlaps: 3
Overlapping N values: [25, 33, 40]


In [6]:
# Let's examine N=40 in detail
n = 40
trees = trees_by_n[n]
print(f"N={n} trees:")
for i, t in enumerate(trees[:5]):
    print(f"  Tree {i}: x={t['x']}, y={t['y']}, deg={t['deg']}")

# Find all overlapping pairs in N=40
polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
overlaps = find_all_overlaps(polygons)
print(f"\nOverlapping pairs in N={n}: {len(overlaps)}")
for i, j, area in overlaps[:5]:
    print(f"  Trees {i} and {j}: intersection area = {area:.2e}")

N=40 trees:
  Tree 0: x=-1.689632103422091, y=-0.247894431182563, deg=252.060472972624126
  Tree 1: x=0.953906339823958, y=-0.552668496076432, deg=252.151405771974453
  Tree 2: x=-0.821313951420922, y=0.356433575901729, deg=252.217176846480044
  Tree 3: x=-1.689632103415816, y=0.489826127465764, deg=252.060472935714216
  Tree 4: x=0.842519162318000, y=-1.882363352068191, deg=246.370622269343670

Overlapping pairs in N=40: 1
  Trees 10 and 17: intersection area = 9.38e-02


In [7]:
def fix_overlaps_for_n(trees, max_iterations=100, separation=0.0001):
    """Fix overlaps by moving trees apart.
    Uses iterative approach: find overlap, move trees apart, repeat."""
    trees = [dict(t) for t in trees]  # Make copies
    
    for iteration in range(max_iterations):
        # Create scaled polygons
        polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
        overlaps = find_all_overlaps(polygons)
        
        if not overlaps:
            return trees, iteration
        
        # Fix each overlap by moving trees apart
        for i, j, area in overlaps:
            # Get centers of the two trees
            xi, yi = float(trees[i]['x']), float(trees[i]['y'])
            xj, yj = float(trees[j]['x']), float(trees[j]['y'])
            
            # Calculate direction from i to j
            dx = xj - xi
            dy = yj - yi
            dist = math.sqrt(dx*dx + dy*dy)
            
            if dist < 1e-10:
                # Trees at same position, move in random direction
                dx, dy = 1.0, 0.0
                dist = 1.0
            
            # Normalize and apply separation
            dx /= dist
            dy /= dist
            
            # Move both trees apart (half separation each)
            trees[i]['x'] = str(xi - dx * separation / 2)
            trees[i]['y'] = str(yi - dy * separation / 2)
            trees[j]['x'] = str(xj + dx * separation / 2)
            trees[j]['y'] = str(yj + dy * separation / 2)
    
    return trees, max_iterations

print("Fix function defined")

Fix function defined


In [8]:
# Fix overlaps for all N values that have them
fixed_trees_by_n = {}
fix_stats = []

for n in range(1, 201):
    trees = trees_by_n[n]
    
    if n in overlap_ns:
        # Fix overlaps
        fixed_trees, iterations = fix_overlaps_for_n(trees, max_iterations=200, separation=0.001)
        fixed_trees_by_n[n] = fixed_trees
        
        # Verify fix
        polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in fixed_trees]
        has_overlap, _, _ = has_overlap_strict(polygons)
        
        fix_stats.append({
            'n': n,
            'iterations': iterations,
            'fixed': not has_overlap
        })
        
        if has_overlap:
            print(f"WARNING: N={n} still has overlaps after {iterations} iterations!")
        else:
            print(f"N={n}: Fixed in {iterations} iterations")
    else:
        fixed_trees_by_n[n] = trees

print(f"\nFixed {len(overlap_ns)} N values")






Fixed 3 N values


In [9]:
# Final validation - check ALL N values
print("Final validation with strict precision...")
final_overlaps = []

for n in range(1, 201):
    trees = fixed_trees_by_n[n]
    polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    has_overlap, i, j = has_overlap_strict(polygons)
    if has_overlap:
        final_overlaps.append(n)
        print(f"  N={n}: Still has overlap between trees {i} and {j}")

if final_overlaps:
    print(f"\n⚠️ WARNING: {len(final_overlaps)} N values still have overlaps!")
else:
    print("\n✅ All N values pass strict overlap validation!")

Final validation with strict precision...
  N=25: Still has overlap between trees 1 and 4
  N=33: Still has overlap between trees 0 and 14
  N=40: Still has overlap between trees 0 and 20





In [10]:
# Calculate score for fixed submission
def calculate_score(trees_by_n):
    """Calculate total score."""
    total_score = 0
    per_n_scores = {}
    per_n_sides = {}
    
    for n in range(1, 201):
        trees = trees_by_n[n]
        # Use standard precision for bounding box
        polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
        union = unary_union(polygons)
        bounds = union.bounds
        width = bounds[2] - bounds[0]
        height = bounds[3] - bounds[1]
        side = max(width, height)
        
        score = (side ** 2) / n
        per_n_scores[n] = score
        per_n_sides[n] = side
        total_score += score
    
    return total_score, per_n_scores, per_n_sides

total_score, per_n_scores, per_n_sides = calculate_score(fixed_trees_by_n)
print(f"Fixed submission score: {total_score:.6f}")
print(f"Target: 68.888293")
print(f"Gap: {total_score - 68.888293:.6f}")

Fixed submission score: 70.686718
Target: 68.888293
Gap: 1.798425


In [11]:
# Compare with baseline
baseline_score, baseline_per_n, baseline_sides = calculate_score(trees_by_n)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Fixed score: {total_score:.6f}")
print(f"Difference: {total_score - baseline_score:.6f}")

# Show which N values changed
print("\nN values with score changes:")
for n in range(1, 201):
    diff = per_n_scores[n] - baseline_per_n[n]
    if abs(diff) > 1e-10:
        print(f"  N={n}: {baseline_per_n[n]:.6f} -> {per_n_scores[n]:.6f} ({diff:+.6f})")

Baseline score: 70.615791
Fixed score: 70.686718
Difference: 0.070926

N values with score changes:
  N=25: 0.372144 -> 0.402051 (+0.029907)
  N=33: 0.369358 -> 0.384177 (+0.014819)
  N=40: 0.362148 -> 0.388348 (+0.026200)


In [12]:
# Create submission CSV
rows = []
for n in range(1, 201):
    trees = fixed_trees_by_n[n]
    for t in trees:
        rows.append({
            'id': f"{n:03d}_{t['idx']}",
            'x': f"s{t['x']}",
            'y': f"s{t['y']}",
            'deg': f"s{t['deg']}"
        })

submission_df = pd.DataFrame(rows)
print(f"Submission shape: {submission_df.shape}")
print(f"Expected: (20100, 4)")

# Save submission
submission_df.to_csv('/home/code/experiments/001_fix_overlaps/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print("\nSubmission saved!")

Submission shape: (20100, 4)
Expected: (20100, 4)

Submission saved!


In [None]:
# Save metrics
metrics = {
    'cv_score': total_score,
    'baseline_score': baseline_score,
    'score_change': total_score - baseline_score,
    'overlap_ns_fixed': overlap_ns,
    'final_overlaps': final_overlaps,
    'target': 68.888293,
    'gap': total_score - 68.888293
}

with open('/home/code/experiments/001_fix_overlaps/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Metrics saved!")
print(f"\nFinal CV Score: {total_score:.6f}")

In [13]:
# More aggressive overlap fixing - use larger separation and smarter approach
def fix_overlaps_aggressive(trees, max_iterations=1000, initial_separation=0.01):
    """Fix overlaps more aggressively by moving trees apart."""
    trees = [dict(t) for t in trees]  # Make copies
    separation = initial_separation
    
    for iteration in range(max_iterations):
        # Create scaled polygons
        polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
        overlaps = find_all_overlaps(polygons)
        
        if not overlaps:
            return trees, iteration, True
        
        # Increase separation if we're stuck
        if iteration > 0 and iteration % 50 == 0:
            separation *= 1.5
        
        # Fix each overlap by moving trees apart
        for i, j, area in overlaps:
            # Get centers of the two trees
            xi, yi = float(trees[i]['x']), float(trees[i]['y'])
            xj, yj = float(trees[j]['x']), float(trees[j]['y'])
            
            # Calculate direction from i to j
            dx = xj - xi
            dy = yj - yi
            dist = math.sqrt(dx*dx + dy*dy)
            
            if dist < 1e-10:
                # Trees at same position, move in random direction
                dx, dy = 1.0, 0.0
                dist = 1.0
            
            # Normalize and apply separation
            dx /= dist
            dy /= dist
            
            # Move both trees apart (half separation each)
            trees[i]['x'] = str(xi - dx * separation / 2)
            trees[i]['y'] = str(yi - dy * separation / 2)
            trees[j]['x'] = str(xj + dx * separation / 2)
            trees[j]['y'] = str(yj + dy * separation / 2)
    
    return trees, max_iterations, False

# Try aggressive fix on the problematic N values
print("Trying aggressive overlap fixing...")
for n in [25, 33, 40]:
    trees = trees_by_n[n]
    fixed_trees, iterations, success = fix_overlaps_aggressive(trees, max_iterations=500, initial_separation=0.005)
    
    # Verify
    polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in fixed_trees]
    has_overlap, i, j = has_overlap_strict(polygons)
    
    if not has_overlap:
        print(f"N={n}: Fixed in {iterations} iterations!")
        fixed_trees_by_n[n] = fixed_trees
    else:
        print(f"N={n}: Still has overlap after {iterations} iterations (separation grew to large)")

Trying aggressive overlap fixing...


N=25: Fixed in 64 iterations!


N=33: Fixed in 280 iterations!


N=40: Fixed in 101 iterations!


In [None]:
# Final validation after aggressive fix
print("Final validation with strict precision...")
final_overlaps = []

for n in range(1, 201):
    trees = fixed_trees_by_n[n]
    polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    has_overlap, i, j = has_overlap_strict(polygons)
    if has_overlap:
        final_overlaps.append(n)
        print(f"  N={n}: Still has overlap between trees {i} and {j}")

if final_overlaps:
    print(f"\n⚠️ WARNING: {len(final_overlaps)} N values still have overlaps!")
else:
    print("\n✅ All N values pass strict overlap validation!")

In [None]:
# Calculate final score
total_score, per_n_scores, per_n_sides = calculate_score(fixed_trees_by_n)
print(f"Fixed submission score: {total_score:.6f}")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Score change: {total_score - baseline_score:+.6f}")
print(f"Target: 68.888293")
print(f"Gap: {total_score - 68.888293:.6f}")

In [None]:
# Create final submission CSV
rows = []
for n in range(1, 201):
    trees = fixed_trees_by_n[n]
    for t in trees:
        rows.append({
            'id': f"{n:03d}_{t['idx']}",
            'x': f"s{t['x']}",
            'y': f"s{t['y']}",
            'deg': f"s{t['deg']}"
        })

submission_df = pd.DataFrame(rows)
print(f"Submission shape: {submission_df.shape}")

# Save submission
submission_df.to_csv('/home/code/experiments/001_fix_overlaps/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print("Submission saved!")