# Evolver Loop 5 Analysis: Understanding Overlap Failures

## Critical Issue
Our submissions keep failing Kaggle validation with overlap errors:
- exp_001: "Overlapping trees in group 002"
- exp_002: "Overlapping trees in group 003"
- exp_004: "Overlapping trees in group 060"

Our local validation passes with tolerance=1e-15, but Kaggle's validation is stricter.

## Key Questions
1. What is Kaggle's exact overlap detection method?
2. Why do different N values fail each time?
3. Can we use a more robust overlap detection?

In [None]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from decimal import Decimal, getcontext
getcontext().prec = 50

# Load the baseline (which passes Kaggle)
baseline_path = '/home/code/experiments/000_baseline/submission.csv'
baseline_df = pd.read_csv(baseline_path)

# Load the failed submission
failed_path = '/home/code/experiments/004_optimize_ensemble/submission.csv'
failed_df = pd.read_csv(failed_path)

print(f"Baseline shape: {baseline_df.shape}")
print(f"Failed submission shape: {failed_df.shape}")

In [None]:
# Define the Christmas tree polygon (15 vertices)
def get_tree_polygon(x, y, angle):
    """Create a Christmas tree polygon at (x, y) with given rotation angle."""
    # Tree dimensions
    trunk_w = 0.15
    trunk_h = 0.2
    base_w = 0.7
    mid_w = 0.4
    top_w = 0.25
    tip_y = 0.8
    tier_1_y = 0.5
    tier_2_y = 0.25
    base_y = 0.0
    trunk_bottom_y = -trunk_h
    
    # 15 vertices
    vertices = [
        (0.0, tip_y),
        (top_w / 2, tier_1_y),
        (top_w / 4, tier_1_y),
        (mid_w / 2, tier_2_y),
        (mid_w / 4, tier_2_y),
        (base_w / 2, base_y),
        (trunk_w / 2, base_y),
        (trunk_w / 2, trunk_bottom_y),
        (-trunk_w / 2, trunk_bottom_y),
        (-trunk_w / 2, base_y),
        (-base_w / 2, base_y),
        (-mid_w / 4, tier_2_y),
        (-mid_w / 2, tier_2_y),
        (-top_w / 4, tier_1_y),
        (-top_w / 2, tier_1_y),
    ]
    
    poly = Polygon(vertices)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x, yoff=y)
    return poly

print("Tree polygon function defined")

In [None]:
# Check the problematic N=60 in detail
def get_trees_for_n(df, n):
    """Extract trees for a specific N value."""
    prefix = f"{n:03d}_"
    n_data = df[df['id'].str.startswith(prefix)].copy()
    
    trees = []
    for _, row in n_data.iterrows():
        x = float(str(row['x']).lstrip('s'))
        y = float(str(row['y']).lstrip('s'))
        angle = float(str(row['deg']).lstrip('s'))
        trees.append((x, y, angle))
    return trees

# Get N=60 from both submissions
baseline_n60 = get_trees_for_n(baseline_df, 60)
failed_n60 = get_trees_for_n(failed_df, 60)

print(f"Baseline N=60: {len(baseline_n60)} trees")
print(f"Failed N=60: {len(failed_n60)} trees")

In [None]:
# Check if N=60 is different between baseline and failed
def compare_trees(trees1, trees2):
    """Compare two sets of trees."""
    if len(trees1) != len(trees2):
        return False, "Different number of trees"
    
    for i, (t1, t2) in enumerate(zip(trees1, trees2)):
        if t1 != t2:
            return False, f"Tree {i} differs: {t1} vs {t2}"
    return True, "Identical"

same, msg = compare_trees(baseline_n60, failed_n60)
print(f"N=60 same in both? {same}: {msg}")

# If different, show the differences
if not same:
    for i, (b, f) in enumerate(zip(baseline_n60, failed_n60)):
        if b != f:
            print(f"Tree {i}: baseline={b}, failed={f}")

In [None]:
# Check for overlaps in N=60 using different methods
def check_overlaps_shapely(trees, tolerance=0):
    """Check for overlaps using Shapely with given tolerance."""
    polygons = [get_tree_polygon(x, y, a) for x, y, a in trees]
    
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            # Check intersection
            if polygons[i].intersects(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                area = intersection.area
                if area > tolerance:
                    overlaps.append((i, j, area))
    return overlaps

def check_overlaps_relate(trees):
    """Check for overlaps using relate() - more accurate."""
    polygons = [get_tree_polygon(x, y, a) for x, y, a in trees]
    
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            relate = polygons[i].relate(polygons[j])
            # relate[0] == '2' means 2D interior intersection
            if relate[0] == '2':
                intersection = polygons[i].intersection(polygons[j])
                overlaps.append((i, j, intersection.area, relate))
    return overlaps

# Check baseline N=60
print("Baseline N=60 overlaps (tolerance=0):")
overlaps_b = check_overlaps_shapely(baseline_n60, tolerance=0)
print(f"  Found: {len(overlaps_b)}")

print("\nBaseline N=60 overlaps (relate method):")
overlaps_b_relate = check_overlaps_relate(baseline_n60)
print(f"  Found: {len(overlaps_b_relate)}")

# Check failed N=60
print("\nFailed N=60 overlaps (tolerance=0):")
overlaps_f = check_overlaps_shapely(failed_n60, tolerance=0)
print(f"  Found: {len(overlaps_f)}")

print("\nFailed N=60 overlaps (relate method):")
overlaps_f_relate = check_overlaps_relate(failed_n60)
print(f"  Found: {len(overlaps_f_relate)}")
for o in overlaps_f_relate:
    print(f"  Trees {o[0]}-{o[1]}: area={o[2]:.2e}, relate={o[3]}")

In [None]:
# Check ALL N values in the failed submission for overlaps
print("Checking ALL N values for overlaps using relate() method...")

problematic_ns = []
for n in range(1, 201):
    trees = get_trees_for_n(failed_df, n)
    overlaps = check_overlaps_relate(trees)
    if overlaps:
        problematic_ns.append((n, overlaps))
        print(f"N={n}: {len(overlaps)} overlaps")
        for o in overlaps[:3]:  # Show first 3
            print(f"  Trees {o[0]}-{o[1]}: area={o[2]:.2e}, relate={o[3]}")

print(f"\nTotal problematic N values: {len(problematic_ns)}")

In [None]:
# Compare with baseline for the problematic N values
print("\nComparing problematic N values with baseline...")

for n, overlaps in problematic_ns:
    baseline_trees = get_trees_for_n(baseline_df, n)
    baseline_overlaps = check_overlaps_relate(baseline_trees)
    
    same, _ = compare_trees(baseline_trees, get_trees_for_n(failed_df, n))
    
    print(f"N={n}: Failed has {len(overlaps)} overlaps, Baseline has {len(baseline_overlaps)} overlaps, Same config: {same}")

In [None]:
# Key insight: The issue is that our ensemble picks configurations from different sources
# that may have tiny overlaps that pass our validation but fail Kaggle's

# Solution: Use the baseline for ALL problematic N values
# Let's create a truly safe submission

print("Creating a truly safe submission...")
print("Strategy: For any N where the ensemble config has overlaps (via relate method),")
print("          replace with baseline configuration.")

# Create fixed submission
fixed_df = failed_df.copy()

for n, overlaps in problematic_ns:
    print(f"Replacing N={n} with baseline...")
    
    # Remove failed N data
    prefix = f"{n:03d}_"
    fixed_df = fixed_df[~fixed_df['id'].str.startswith(prefix)]
    
    # Add baseline N data
    baseline_n_data = baseline_df[baseline_df['id'].str.startswith(prefix)]
    fixed_df = pd.concat([fixed_df, baseline_n_data], ignore_index=True)

# Sort by id
fixed_df['n'] = fixed_df['id'].apply(lambda x: int(x.split('_')[0]))
fixed_df['tree_idx'] = fixed_df['id'].apply(lambda x: int(x.split('_')[1]))
fixed_df = fixed_df.sort_values(['n', 'tree_idx']).drop(columns=['n', 'tree_idx']).reset_index(drop=True)

print(f"\nFixed submission shape: {fixed_df.shape}")

In [None]:
# Verify the fixed submission has no overlaps
print("Verifying fixed submission...")

final_problems = []
for n in range(1, 201):
    trees = get_trees_for_n(fixed_df, n)
    overlaps = check_overlaps_relate(trees)
    if overlaps:
        final_problems.append((n, overlaps))
        print(f"N={n}: Still has {len(overlaps)} overlaps!")

if not final_problems:
    print("✅ No overlaps detected in fixed submission!")
else:
    print(f"❌ Still have {len(final_problems)} problematic N values")

In [None]:
# Calculate the score of the fixed submission
from utils import score_submission

fixed_score, scores_by_n, _ = score_submission(fixed_df, check_overlaps=False)
baseline_score, _, _ = score_submission(baseline_df, check_overlaps=False)

print(f"Fixed submission score: {fixed_score:.6f}")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Improvement over baseline: {baseline_score - fixed_score:.6f}")
print(f"\nTarget: 68.888293")
print(f"Gap to target: {fixed_score - 68.888293:.6f}")

In [None]:
# Save the fixed submission
import shutil
import json
import os

work_dir = '/home/code/experiments/005_fixed_submission'
os.makedirs(work_dir, exist_ok=True)

fixed_df.to_csv(f'{work_dir}/submission.csv', index=False)
shutil.copy(f'{work_dir}/submission.csv', '/home/submission/submission.csv')

metrics = {
    'cv_score': fixed_score,
    'baseline_score': baseline_score,
    'improvement_over_baseline': baseline_score - fixed_score,
    'problematic_ns_fixed': [n for n, _ in problematic_ns],
    'is_valid': len(final_problems) == 0
}

with open(f'{work_dir}/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Saved to {work_dir}/submission.csv")
print(f"Metrics: {metrics}")