# Loop 1 Analysis: Fix Overlapping Trees

The baseline submission failed with 'Overlapping trees in group 004'. We need to:
1. Validate all groups for overlaps
2. Fix overlapping groups using valid configurations
3. Create a valid submission

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import json

getcontext().prec = 30
scale_factor = 1

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

print('ChristmasTree class defined')

ChristmasTree class defined


In [2]:
def parse_value(val):
    """Parse submission value (may have 's' prefix)"""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    """Load all trees for configuration n"""
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap(trees, tolerance=1e-10):
    """Check if any trees overlap (with tolerance for floating point)"""
    if len(trees) <= 1:
        return False, []
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    overlaps = []
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx > i:  # Only check each pair once
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    intersection = poly.intersection(polygons[idx])
                    if intersection.area > tolerance:
                        overlaps.append((i, idx, intersection.area))
    return len(overlaps) > 0, overlaps

def get_bounding_box_side(trees):
    """Get the side length of the bounding square"""
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    
    return max(max_x - min_x, max_y - min_y)

print('Helper functions defined')

Helper functions defined


In [3]:
# Load the baseline submission
baseline_path = '/home/code/experiments/001_baseline/submission.csv'
df = pd.read_csv(baseline_path)
print(f'Loaded submission with {len(df)} rows')

# Check all groups for overlaps
print('\nChecking all groups for overlaps...')
overlapping_groups = []
for n in range(1, 201):
    trees = load_trees_for_n(df, n)
    has_ovlp, overlaps = has_overlap(trees)
    if has_ovlp:
        overlapping_groups.append(n)
        print(f'  N={n}: OVERLAP! {len(overlaps)} pairs')
        for i, j, area in overlaps[:3]:  # Show first 3
            print(f'    Trees {i} and {j}: area={area:.2e}')

print(f'\nTotal overlapping groups: {len(overlapping_groups)}')
print(f'Groups: {overlapping_groups}')

Loaded submission with 20100 rows

Checking all groups for overlaps...



Total overlapping groups: 0
Groups: []


In [4]:
# Load sample submission as donor for valid configurations
sample_path = '/home/data/sample_submission.csv'
df_sample = pd.read_csv(sample_path)
print(f'Loaded sample submission with {len(df_sample)} rows')

# Check if sample submission has overlaps in the problematic groups
print('\nChecking sample submission for overlaps in problematic groups...')
for n in overlapping_groups:
    trees = load_trees_for_n(df_sample, n)
    has_ovlp, overlaps = has_overlap(trees)
    if has_ovlp:
        print(f'  N={n}: OVERLAP in sample too!')
    else:
        print(f'  N={n}: Sample is valid')

Loaded sample submission with 20100 rows

Checking sample submission for overlaps in problematic groups...


In [5]:
# Function to replace a group in the submission
def replace_group(df_target, df_donor, n):
    """Replace group n in target with group n from donor"""
    prefix = f"{n:03d}_"
    # Remove old group
    df_new = df_target[~df_target['id'].str.startswith(prefix)].copy()
    # Add new group from donor
    donor_rows = df_donor[df_donor['id'].str.startswith(prefix)].copy()
    df_new = pd.concat([df_new, donor_rows], ignore_index=True)
    # Sort by id
    df_new['sort_key'] = df_new['id'].apply(lambda x: (int(x.split('_')[0]), int(x.split('_')[1])))
    df_new = df_new.sort_values('sort_key').drop('sort_key', axis=1).reset_index(drop=True)
    return df_new

# Fix overlapping groups
df_fixed = df.copy()
for n in overlapping_groups:
    print(f'Replacing group {n}...')
    df_fixed = replace_group(df_fixed, df_sample, n)

print(f'\nFixed submission has {len(df_fixed)} rows')


Fixed submission has 20100 rows


In [6]:
# Verify no overlaps in fixed submission
print('Verifying fixed submission...')
still_overlapping = []
for n in range(1, 201):
    trees = load_trees_for_n(df_fixed, n)
    has_ovlp, overlaps = has_overlap(trees)
    if has_ovlp:
        still_overlapping.append(n)
        print(f'  N={n}: Still has overlap!')

if not still_overlapping:
    print('\u2705 No overlaps in fixed submission!')
else:
    print(f'\u274c Still have overlaps in: {still_overlapping}')

Verifying fixed submission...


✅ No overlaps in fixed submission!


In [7]:
# Calculate score for fixed submission
def calculate_score(df):
    total_score = 0
    scores_by_n = []
    
    for n in range(1, 201):
        trees = load_trees_for_n(df, n)
        if len(trees) != n:
            print(f'Warning: N={n} has {len(trees)} trees instead of {n}')
            continue
        
        side = get_bounding_box_side(trees)
        contribution = (side ** 2) / n
        total_score += contribution
        scores_by_n.append({
            'n': n,
            'side': side,
            'contribution': contribution
        })
    
    return total_score, scores_by_n

print('Calculating score for fixed submission...')
total_score, scores_by_n = calculate_score(df_fixed)
print(f'\nTotal Score: {total_score:.6f}')
print(f'Target Score: 68.919154')
print(f'Gap: {total_score - 68.919154:.6f} ({(total_score - 68.919154) / 68.919154 * 100:.2f}%)')

Calculating score for fixed submission...



Total Score: 70.676102
Target Score: 68.919154
Gap: 1.756948 (2.55%)


In [None]:
# Save fixed submission
import os
os.makedirs('/home/code/experiments/002_fixed_baseline', exist_ok=True)
df_fixed.to_csv('/home/code/experiments/002_fixed_baseline/submission.csv', index=False)
df_fixed.to_csv('/home/submission/submission.csv', index=False)
print('Saved fixed submission to /home/submission/submission.csv')

# Save metrics
metrics = {'cv_score': total_score}
with open('/home/code/experiments/002_fixed_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f'Metrics: {metrics}')

In [8]:
# Investigate group 004 more closely - Kaggle detected overlap there
trees_4 = load_trees_for_n(df, 4)
print(f"N=4 has {len(trees_4)} trees")

# Check each pair with ZERO tolerance
from itertools import combinations
for i, j in combinations(range(len(trees_4)), 2):
    poly_i = trees_4[i].polygon
    poly_j = trees_4[j].polygon
    if poly_i.intersects(poly_j):
        intersection = poly_i.intersection(poly_j)
        print(f"Trees {i} and {j}:")
        print(f"  intersects: True")
        print(f"  touches: {poly_i.touches(poly_j)}")
        print(f"  intersection area: {intersection.area}")
        print(f"  intersection type: {intersection.geom_type}")
        if hasattr(intersection, 'length'):
            print(f"  intersection length: {intersection.length}")

N=4 has 4 trees
Trees 1 and 2:
  intersects: True
  touches: False
  intersection area: 0.0
  intersection type: MultiPoint
  intersection length: 0.0


In [9]:
# Check sample submission's N=4 configuration\ntrees_4_sample = load_trees_for_n(df_sample, 4)\nprint(f\"Sample N=4 has {len(trees_4_sample)} trees\")\n\nfor i, j in combinations(range(len(trees_4_sample)), 2):\n    poly_i = trees_4_sample[i].polygon\n    poly_j = trees_4_sample[j].polygon\n    if poly_i.intersects(poly_j):\n        intersection = poly_i.intersection(poly_j)\n        print(f\"Trees {i} and {j}:\")\n        print(f\"  intersects: True\")\n        print(f\"  touches: {poly_i.touches(poly_j)}\")\n        print(f\"  intersection type: {intersection.geom_type}\")\n\n# Compare bounding boxes\nside_baseline = get_bounding_box_side(trees_4)\nside_sample = get_bounding_box_side(trees_4_sample)\nprint(f\"\\nBaseline N=4 side: {side_baseline:.6f}\")\nprint(f\"Sample N=4 side: {side_sample:.6f}\")\nprint(f\"Difference: {side_sample - side_baseline:.6f}\")

In [10]:
# Check sample submission's N=4 configuration
trees_4_sample = load_trees_for_n(df_sample, 4)
print(f"Sample N=4 has {len(trees_4_sample)} trees")

for i, j in combinations(range(len(trees_4_sample)), 2):
    poly_i = trees_4_sample[i].polygon
    poly_j = trees_4_sample[j].polygon
    if poly_i.intersects(poly_j):
        intersection = poly_i.intersection(poly_j)
        print(f"Trees {i} and {j}:")
        print(f"  intersects: True")
        print(f"  touches: {poly_i.touches(poly_j)}")
        print(f"  intersection type: {intersection.geom_type}")

# Compare bounding boxes
side_baseline = get_bounding_box_side(trees_4)
side_sample = get_bounding_box_side(trees_4_sample)
print(f"\\nBaseline N=4 side: {side_baseline:.6f}")
print(f"Sample N=4 side: {side_sample:.6f}")
print(f"Difference: {side_sample - side_baseline:.6f}")

Sample N=4 has 4 trees
\nBaseline N=4 side: 1.290806
Sample N=4 side: 2.039257
Difference: 0.748451


In [11]:
# Let's look at other pre-optimized submissions in snapshots\nimport os\nimport glob\n\n# Find all CSV files in snapshots\ncsv_files = glob.glob('/home/nonroot/snapshots/santa-2025/**/*.csv', recursive=True)\nprint(f\"Found {len(csv_files)} CSV files in snapshots\")\n\n# Check a few for N=4 configurations\nvalid_n4_files = []\nfor csv_file in csv_files[:50]:  # Check first 50\n    try:\n        df_test = pd.read_csv(csv_file)\n        if len(df_test) == 20100 and 'id' in df_test.columns:\n            trees_4_test = load_trees_for_n(df_test, 4)\n            if len(trees_4_test) == 4:\n                # Check for any intersection\n                has_intersection = False\n                for i, j in combinations(range(4), 2):\n                    if trees_4_test[i].polygon.intersects(trees_4_test[j].polygon):\n                        if not trees_4_test[i].polygon.touches(trees_4_test[j].polygon):\n                            has_intersection = True\n                            break\n                if not has_intersection:\n                    side = get_bounding_box_side(trees_4_test)\n                    valid_n4_files.append((csv_file, side))\n    except Exception as e:\n        pass\n\nprint(f\"\\nFound {len(valid_n4_files)} files with valid N=4 (no intersections)\")\nif valid_n4_files:\n    valid_n4_files.sort(key=lambda x: x[1])\n    print(\"\\nTop 5 by smallest N=4 side:\")\n    for f, s in valid_n4_files[:5]:\n        print(f\"  {s:.6f}: {f}\")

In [13]:
# Try to fix N=4 by slightly perturbing the trees to eliminate point intersection
# First, let's see the current N=4 configuration
print("Current N=4 configuration:")
for i, tree in enumerate(trees_4):
    print(f"  Tree {i}: x={tree.center_x}, y={tree.center_y}, angle={tree.angle}")

# The intersection is between trees 1 and 2
# Let's try moving tree 1 slightly
from decimal import Decimal

# Create a new N=4 configuration with a small perturbation
def create_perturbed_n4(df, perturbation=0.0001):
    """Create a perturbed N=4 configuration"""
    trees = load_trees_for_n(df, 4)
    
    # Try different perturbations
    best_side = float('inf')
    best_config = None
    
    for dx in [-perturbation, 0, perturbation]:
        for dy in [-perturbation, 0, perturbation]:
            # Perturb tree 1
            new_trees = []
            for i, tree in enumerate(trees):
                if i == 1:
                    new_x = float(tree.center_x) + dx
                    new_y = float(tree.center_y) + dy
                    new_trees.append(ChristmasTree(str(new_x), str(new_y), str(tree.angle)))
                else:
                    new_trees.append(ChristmasTree(str(tree.center_x), str(tree.center_y), str(tree.angle)))
            
            # Check for intersections
            has_intersection = False
            for i, j in combinations(range(4), 2):
                if new_trees[i].polygon.intersects(new_trees[j].polygon):
                    if not new_trees[i].polygon.touches(new_trees[j].polygon):
                        has_intersection = True
                        break
            
            if not has_intersection:
                side = get_bounding_box_side(new_trees)
                if side < best_side:
                    best_side = side
                    best_config = [(float(t.center_x), float(t.center_y), float(t.angle)) for t in new_trees]
    
    return best_config, best_side

best_config, best_side = create_perturbed_n4(df)
if best_config:
    print(f"\nFound valid N=4 configuration with side={best_side:.6f}")
    print(f"Original side: {side_baseline:.6f}")
    print(f"Difference: {best_side - side_baseline:.6f}")
else:
    print("\nCould not find valid configuration with small perturbation")

Current N=4 configuration:
  Tree 0: x=-0.324747789589372, y=0.132109978088185, angle=156.370622145636389
  Tree 1: x=0.315354346242638, y=0.132109978063475, angle=156.370622269264089
  Tree 2: x=0.324747789592379, y=-0.732109978069476, angle=336.370622269264004
  Tree 3: x=-0.315354348134818, y=-0.732109978094186, angle=336.370622145636446

Found valid N=4 configuration with side=1.290906
Original side: 1.290806
Difference: 0.000100


In [None]:
# Create the fixed submission with perturbed N=4
def update_n4_in_df(df, new_config):
    """Update N=4 configuration in dataframe"""
    df_new = df.copy()
    
    # Find and update N=4 rows
    for i, (x, y, angle) in enumerate(new_config):
        row_id = f"004_{i}"
        mask = df_new['id'] == row_id
        df_new.loc[mask, 'x'] = f"s{x}"
        df_new.loc[mask, 'y'] = f"s{y}"
        df_new.loc[mask, 'deg'] = f"s{angle}"
    
    return df_new

# Update the submission
df_fixed = update_n4_in_df(df, best_config)

# Verify the fix
trees_4_fixed = load_trees_for_n(df_fixed, 4)
print("Fixed N=4 configuration:")
for i, tree in enumerate(trees_4_fixed):
    print(f"  Tree {i}: x={tree.center_x}, y={tree.center_y}, angle={tree.angle}")

# Check for intersections
has_intersection = False
for i, j in combinations(range(4), 2):
    if trees_4_fixed[i].polygon.intersects(trees_4_fixed[j].polygon):
        if not trees_4_fixed[i].polygon.touches(trees_4_fixed[j].polygon):
            has_intersection = True
            print(f"  Trees {i} and {j} still intersect!")

if not has_intersection:
    print("\n✅ No intersections in fixed N=4!")
    
# Calculate new score
print("\nCalculating new total score...")
total_score_fixed, _ = calculate_score(df_fixed)
print(f"New Total Score: {total_score_fixed:.6f}")
print(f"Original Score: {total_score:.6f}")
print(f"Difference: {total_score_fixed - total_score:.6f}")