# Experiment 004: Fix Overlaps in Pre-Optimized Solutions

The pre-optimized baseline (70.6) has subtle overlaps that Kaggle rejects.
This experiment attempts to fix these overlaps by:
1. Detecting overlapping tree pairs with strict integer-coordinate validation
2. Slightly separating overlapping trees
3. Re-validating and submitting

In [None]:
import math
import os
from decimal import Decimal, getcontext
import numpy as np
import pandas as pd
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import json
from tqdm import tqdm

# Use higher precision
getcontext().prec = 30
scale_factor = Decimal('1e15')  # Same as getting-started kernel

print(f"Decimal precision: {getcontext().prec}")
print(f"Scale factor: {scale_factor}")

In [None]:
# ChristmasTree class with polygon creation
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        self.polygon = self._create_polygon()
    
    def _create_polygon(self):
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        return affinity.translate(rotated,
                                  xoff=float(self.center_x * scale_factor),
                                  yoff=float(self.center_y * scale_factor))
    
    def update_polygon(self):
        self.polygon = self._create_polygon()

print("ChristmasTree class defined")

In [None]:
def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_submission(path):
    """Load a submission CSV and return trees by N."""
    df = pd.read_csv(path)
    trees_by_n = {}
    
    for _, row in df.iterrows():
        id_parts = row['id'].split('_')
        n = int(id_parts[0])
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        
        if n not in trees_by_n:
            trees_by_n[n] = []
        trees_by_n[n].append(ChristmasTree(center_x=x, center_y=y, angle=deg))
    
    return trees_by_n

def check_overlap(trees):
    """Check for overlaps using Kaggle's method."""
    polygons = [t.polygon for t in trees]
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                overlaps.append((i, j))
    return overlaps

def calculate_side_length(trees):
    """Calculate bounding box side length."""
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(str(bounds[0])) / scale_factor
    miny = Decimal(str(bounds[1])) / scale_factor
    maxx = Decimal(str(bounds[2])) / scale_factor
    maxy = Decimal(str(bounds[3])) / scale_factor
    width = maxx - minx
    height = maxy - miny
    return max(width, height)

print("Helper functions defined")

In [None]:
# Load the pre-optimized baseline that was rejected
baseline_path = '/home/nonroot/snapshots/santa-2025/21329067673/submission/submission.csv'
print(f"Loading baseline from: {baseline_path}")
trees_by_n = load_submission(baseline_path)
print(f"Loaded {len(trees_by_n)} N values")

In [None]:
# Check which N values have overlaps
print("Checking for overlaps in all N values...")
overlapping_n = []
overlap_details = {}

for n in tqdm(range(1, 201), desc="Checking overlaps"):
    if n in trees_by_n:
        overlaps = check_overlap(trees_by_n[n])
        if overlaps:
            overlapping_n.append(n)
            overlap_details[n] = overlaps

print(f"\nN values with overlaps: {len(overlapping_n)}")
if overlapping_n:
    print(f"Overlapping N values: {overlapping_n[:20]}..." if len(overlapping_n) > 20 else f"Overlapping N values: {overlapping_n}")

In [None]:
# Try to fix overlaps by slightly separating trees
def separate_overlapping_trees(trees, overlap_pairs, separation=1e-6):
    """Move overlapping trees apart by a small amount."""
    fixed_count = 0
    for i, j in overlap_pairs:
        # Calculate direction from tree i to tree j
        dx = float(trees[j].center_x - trees[i].center_x)
        dy = float(trees[j].center_y - trees[i].center_y)
        dist = (dx**2 + dy**2)**0.5
        
        if dist > 1e-10:
            # Move tree j away from tree i
            move_x = separation * dx / dist
            move_y = separation * dy / dist
        else:
            # Trees are at same position, move in random direction
            move_x = separation
            move_y = 0
        
        trees[j].center_x += Decimal(str(move_x))
        trees[j].center_y += Decimal(str(move_y))
        trees[j].update_polygon()
        fixed_count += 1
    
    return trees, fixed_count

print("Separation function defined")

In [None]:
# Fix overlaps in all N values
print("Attempting to fix overlaps...")
fixed_trees_by_n = {}
still_overlapping = []

for n in tqdm(range(1, 201), desc="Fixing overlaps"):
    if n in trees_by_n:
        trees = trees_by_n[n]
        overlaps = check_overlap(trees)
        
        if overlaps:
            # Try multiple separation attempts with increasing separation
            for sep in [1e-6, 1e-5, 1e-4, 1e-3]:
                trees, _ = separate_overlapping_trees(trees, overlaps, separation=sep)
                overlaps = check_overlap(trees)
                if not overlaps:
                    break
            
            if overlaps:
                still_overlapping.append(n)
        
        fixed_trees_by_n[n] = trees

print(f"\nN values still with overlaps after fix: {len(still_overlapping)}")
if still_overlapping:
    print(f"Still overlapping: {still_overlapping}")

In [None]:
# If there are still overlaps, use greedy solution for those N values
if still_overlapping:
    print(f"\nUsing greedy fallback for {len(still_overlapping)} N values with persistent overlaps...")
    
    # Load greedy solution
    greedy_path = '/home/code/experiments/003_greedy_from_scratch/submission.csv'
    greedy_trees = load_submission(greedy_path)
    
    for n in still_overlapping:
        fixed_trees_by_n[n] = greedy_trees[n]
        print(f"  N={n}: Using greedy fallback")

# Final validation
print("\nFinal validation...")
final_overlapping = []
for n in range(1, 201):
    if n in fixed_trees_by_n:
        overlaps = check_overlap(fixed_trees_by_n[n])
        if overlaps:
            final_overlapping.append(n)

print(f"Final N values with overlaps: {len(final_overlapping)}")
if final_overlapping:
    print(f"STILL OVERLAPPING: {final_overlapping}")
else:
    print("âœ“ All N values validated - NO OVERLAPS!")

In [None]:
# Calculate total score
total_score = 0
per_n_scores = {}

for n in range(1, 201):
    if n in fixed_trees_by_n:
        side = calculate_side_length(fixed_trees_by_n[n])
        score = float(side ** 2) / n
        per_n_scores[n] = score
        total_score += score

print(f"Total score: {total_score:.6f}")
print(f"\nComparison:")
print(f"  Original baseline: 70.615745")
print(f"  Our fixed version: {total_score:.6f}")
print(f"  Greedy fallback:   169.458992")
print(f"  Difference from baseline: {total_score - 70.615745:.6f}")

In [None]:
# Create submission DataFrame
index = [f'{n:03d}_{t}' for n in range(1, 201) for t in range(n)]
tree_data = []

for n in range(1, 201):
    for tree in fixed_trees_by_n[n]:
        tree_data.append([tree.center_x, tree.center_y, tree.angle])

cols = ['x', 'y', 'deg']
submission = pd.DataFrame(index=index, columns=cols, data=tree_data).rename_axis('id')

# Format EXACTLY as in getting-started kernel
for col in cols:
    submission[col] = submission[col].astype(float).round(decimals=6)
    
# Prepend 's' to ensure string format
for col in submission.columns:
    submission[col] = 's' + submission[col].astype('string')

print(f"Submission shape: {submission.shape}")
print(f"\nFirst 5 rows:")
print(submission.head())

In [None]:
# Save submission
os.makedirs('/home/submission', exist_ok=True)
submission.to_csv('/home/submission/submission.csv')
submission.to_csv('/home/code/experiments/004_fix_overlaps/submission.csv')

# Save metrics
metrics = {
    'cv_score': total_score,
    'overlapping_n_count': len(final_overlapping),
    'fixed_n_count': len(overlapping_n) - len(still_overlapping),
    'greedy_fallback_count': len(still_overlapping),
    'per_n_scores': {str(k): v for k, v in per_n_scores.items()}
}

with open('/home/code/experiments/004_fix_overlaps/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nSubmission saved!")
print(f"CV Score: {total_score:.6f}")
print(f"Overlapping N values: {len(final_overlapping)}")
print(f"Fixed N values: {len(overlapping_n) - len(still_overlapping)}")
print(f"Greedy fallback N values: {len(still_overlapping)}")