# Loop 3 Analysis: Fix Submission Overlap Issue

The submission failed with 'Overlapping trees in group 002'. Need to:
1. Validate the correct submission file
2. Understand why the previous submission had overlaps
3. Identify a path forward to beat the target

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import warnings
warnings.filterwarnings('ignore')

# Tree shape definition
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125, 0]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5, 0.8]

def get_tree_polygon(x, y, angle_deg):
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle_deg, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x, yoff=y)
    return poly

def has_overlap(poly1, poly2, tolerance=1e-9):
    if not poly1.intersects(poly2):
        return False
    intersection = poly1.intersection(poly2)
    return intersection.area > tolerance

def get_side_length(polys):
    union = unary_union(polys)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def check_overlaps(polys):
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if has_overlap(polys[i], polys[j]):
                return True
    return False

print('Functions defined')

Functions defined


In [2]:
# Load the CORRECT submission file (restored from snapshot)
df = pd.read_csv('/home/submission/submission.csv')
df['x_val'] = df['x'].astype(str).str.replace('s', '').astype(float)
df['y_val'] = df['y'].astype(str).str.replace('s', '').astype(float)
df['deg_val'] = df['deg'].astype(str).str.replace('s', '').astype(float)
df['n'] = df['id'].apply(lambda x: int(str(x).split('_')[0]))

print(f'Loaded {len(df)} rows')
print(f'N values: {df["n"].min()} to {df["n"].max()}')

Loaded 20100 rows
N values: 1 to 200


In [3]:
# Validate N=2 specifically (the group that had overlaps)
n2_group = df[df['n'] == 2]
print(f'N=2 has {len(n2_group)} trees')
for _, row in n2_group.iterrows():
    print(f"  {row['id']}: x={row['x_val']:.6f}, y={row['y_val']:.6f}, deg={row['deg_val']:.6f}")

# Check for overlaps in N=2
polys = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in n2_group.iterrows()]
print(f'\nOverlap check for N=2: {check_overlaps(polys)}')

# Check intersection area
if len(polys) == 2:
    intersection = polys[0].intersection(polys[1])
    print(f'Intersection area: {intersection.area}')

N=2 has 2 trees
  002_0: x=0.154097, y=-0.038541, deg=203.629378
  002_1: x=-0.154097, y=-0.561459, deg=23.629378

Overlap check for N=2: False
Intersection area: 1.777433712564009e-24


In [4]:
# Full validation of all N groups
print('Validating all N groups...')
overlap_groups = []
total_score = 0

for n in range(1, 201):
    group = df[df['n'] == n]
    polys = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
    
    # Check overlaps
    has_overlap_flag = check_overlaps(polys)
    if has_overlap_flag:
        overlap_groups.append(n)
    
    # Calculate score
    side = get_side_length(polys)
    score = side**2 / n
    total_score += score

print(f'\nTotal groups with overlaps: {len(overlap_groups)}')
if overlap_groups:
    print(f'Groups with overlaps: {overlap_groups[:10]}...')
print(f'\nTotal score: {total_score:.6f}')

Validating all N groups...



Total groups with overlaps: 0

Total score: 70.627569


In [5]:
# If the submission is valid, save it properly
if len(overlap_groups) == 0:
    print('Submission is VALID!')
    print(f'Score: {total_score:.6f}')
    print(f'Target: 68.894234')
    print(f'Gap: {total_score - 68.894234:.6f}')
else:
    print(f'INVALID: {len(overlap_groups)} groups have overlaps')

Submission is VALID!
Score: 70.627569
Target: 68.894234
Gap: 1.733335


In [6]:
# Check the 70.559 solution for overlaps
df_559 = pd.read_csv('/home/nonroot/snapshots/santa-2025/21156852373/code/ensemble_70.559.csv')
df_559['x_val'] = df_559['x'].astype(str).str.replace('s', '').astype(float)
df_559['y_val'] = df_559['y'].astype(str).str.replace('s', '').astype(float)
df_559['deg_val'] = df_559['deg'].astype(str).str.replace('s', '').astype(float)
df_559['n'] = df_559['id'].apply(lambda x: int(str(x).split('_')[0]))

print(f'Loaded {len(df_559)} rows')

# Validate all N groups
print('Validating 70.559 solution...')
overlap_groups_559 = []
total_score_559 = 0

for n in range(1, 201):
    group = df_559[df_559['n'] == n]
    polys = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
    
    # Check overlaps
    has_overlap_flag = check_overlaps(polys)
    if has_overlap_flag:
        overlap_groups_559.append(n)
    
    # Calculate score
    side = get_side_length(polys)
    score = side**2 / n
    total_score_559 += score

print(f'\nTotal groups with overlaps: {len(overlap_groups_559)}')
if overlap_groups_559:
    print(f'Groups with overlaps: {overlap_groups_559[:20]}')
print(f'\nTotal score: {total_score_559:.6f}')

Loaded 20100 rows
Validating 70.559 solution...



Total groups with overlaps: 67
Groups with overlaps: [2, 4, 5, 16, 35, 36, 40, 46, 47, 48, 53, 54, 55, 56, 59, 62, 63, 64, 69, 70]

Total score: 70.559048


In [8]:
# Create ensemble: for each N, take the best VALID configuration from multiple sources
import os
import glob

# Find all CSV sources
sources = [
    '/home/submission/submission.csv',  # Current best valid (70.627569)
    '/home/nonroot/snapshots/santa-2025/21329069570/code/code/solutions/submission_70.627569.csv',
    '/home/nonroot/snapshots/santa-2025/21156852373/code/ensemble_70.559.csv',
]

# Add more sources from snapshots
for pattern in ['/home/nonroot/snapshots/santa-2025/*/code/*.csv',
                '/home/nonroot/snapshots/santa-2025/*/code/code/*.csv',
                '/home/nonroot/snapshots/santa-2025/*/code/solutions/*.csv']:
    sources.extend(glob.glob(pattern))

sources = list(set(sources))
print(f'Found {len(sources)} potential sources')

# Load all sources
all_dfs = {}
for src in sources:
    try:
        df_src = pd.read_csv(src)
        if 'id' in df_src.columns and 'x' in df_src.columns:
            df_src['x_val'] = df_src['x'].astype(str).str.replace('s', '').astype(float)
            df_src['y_val'] = df_src['y'].astype(str).str.replace('s', '').astype(float)
            df_src['deg_val'] = df_src['deg'].astype(str).str.replace('s', '').astype(float)
            df_src['n'] = df_src['id'].apply(lambda x: int(str(x).split('_')[0]))
            all_dfs[src] = df_src
    except:
        pass

print(f'Loaded {len(all_dfs)} valid sources')

Found 106 potential sources


Loaded 104 valid sources


In [None]:
# For each N, find the best VALID configuration across all sources
best_per_n = {}
best_score_per_n = {}

for n in range(1, 201):
    best_score = float('inf')
    best_rows = None
    best_src = None
    
    for src, df_src in all_dfs.items():
        group = df_src[df_src['n'] == n]
        if len(group) != n:
            continue
        
        # Get polygons and check for overlaps
        polys = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
        
        if check_overlaps(polys):
            continue  # Skip if overlaps
        
        # Calculate score
        side = get_side_length(polys)
        score = side**2 / n
        
        if score < best_score:
            best_score = score
            best_rows = group[['id', 'x', 'y', 'deg']].copy()
            best_src = src
    
    if best_rows is not None:
        best_per_n[n] = best_rows
        best_score_per_n[n] = best_score
    
    if n % 20 == 0:
        print(f'Processed N={n}')