# Loop 2 Analysis: Overlapping Trees Issue

The submission failed with 'Overlapping trees in group 002'. Need to:
1. Understand why overlaps occurred
2. Add overlap checking to our ensemble code
3. Find valid configurations only

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
BASE_TREE = Polygon(zip(TX, TY))

def parse_value(val):
    if isinstance(val, str):
        if val.startswith('s'):
            return float(val[1:])
        return float(val)
    return float(val)

def create_tree(x, y, deg):
    tree = affinity.rotate(BASE_TREE, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

def check_overlap(tree1, tree2):
    """Check if two trees overlap (interior intersection)"""
    return tree1.intersects(tree2) and not tree1.touches(tree2)

def check_group_overlaps(df, n):
    """Check if any trees in group N overlap"""
    prefix = f"{n:03d}_"
    n_rows = df[df['id'].str.startswith(prefix)]
    
    trees = []
    for _, row in n_rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(create_tree(x, y, deg))
    
    # Check all pairs
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if check_overlap(trees[i], trees[j]):
                return True, (i, j)
    return False, None

print("Functions defined")

Functions defined


In [2]:
# Load the failed submission
df = pd.read_csv('/home/submission/submission.csv')
print(f"Loaded {len(df)} rows")

# Check group 002 specifically
has_overlap, pair = check_group_overlaps(df, 2)
print(f"Group 002 has overlap: {has_overlap}, pair: {pair}")

# Show the trees in group 002
group_002 = df[df['id'].str.startswith('002_')]
print("\nGroup 002 trees:")
print(group_002)

Loaded 20100 rows
Group 002 has overlap: True, pair: (0, 1)

Group 002 trees:
      id                       x                       y  \
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   

                       deg  
1  s144.272760863123583164  
2  s324.272760863123533426  


In [3]:
# Check all groups for overlaps
overlapping_groups = []
for n in range(1, 201):
    has_overlap, pair = check_group_overlaps(df, n)
    if has_overlap:
        overlapping_groups.append((n, pair))
        if len(overlapping_groups) <= 10:
            print(f"Group {n:03d} has overlap between trees {pair}")

print(f"\nTotal groups with overlaps: {len(overlapping_groups)}")
if len(overlapping_groups) > 10:
    print(f"First 10: {overlapping_groups[:10]}")
    print(f"Last 10: {overlapping_groups[-10:]}")
else:
    print(f"All overlapping groups: {overlapping_groups}")

Group 002 has overlap between trees (0, 1)
Group 004 has overlap between trees (0, 1)
Group 005 has overlap between trees (0, 1)
Group 008 has overlap between trees (2, 6)
Group 011 has overlap between trees (3, 5)
Group 016 has overlap between trees (0, 3)
Group 017 has overlap between trees (2, 15)
Group 021 has overlap between trees (13, 15)
Group 026 has overlap between trees (5, 22)
Group 027 has overlap between trees (10, 25)



Total groups with overlaps: 80
First 10: [(2, (0, 1)), (4, (0, 1)), (5, (0, 1)), (8, (2, 6)), (11, (3, 5)), (16, (0, 3)), (17, (2, 15)), (21, (13, 15)), (26, (5, 22)), (27, (10, 25))]
Last 10: [(168, (0, 5)), (175, (0, 33)), (176, (1, 39)), (177, (1, 39)), (178, (1, 39)), (179, (1, 39)), (185, (0, 32)), (190, (0, 12)), (191, (0, 22)), (192, (0, 22))]


In [None]:
# Let's check which snapshot the N=2 came from and if it has overlaps
from pathlib import Path

# According to the log, N=2 came from 21145966992
snapshot_path = Path('/home/nonroot/snapshots/santa-2025/21145966992/submission/submission.csv')
df_snapshot = pd.read_csv(snapshot_path)

has_overlap, pair = check_group_overlaps(df_snapshot, 2)
print(f"Snapshot 21145966992 group 002 has overlap: {has_overlap}")

# Show the trees
group_002_snap = df_snapshot[df_snapshot['id'].str.startswith('002_')]
print("\nSnapshot group 002 trees:")
print(group_002_snap)

In [None]:
# Compare the two - maybe the 's' prefix conversion is causing issues?
# Let's check if the values are the same

print("Ensemble submission values:")
for _, row in group_002.iterrows():
    print(f"  {row['id']}: x={parse_value(row['x']):.15f}, y={parse_value(row['y']):.15f}, deg={parse_value(row['deg']):.15f}")

print("\nSnapshot values:")
for _, row in group_002_snap.iterrows():
    print(f"  {row['id']}: x={parse_value(row['x']):.15f}, y={parse_value(row['y']):.15f}, deg={parse_value(row['deg']):.15f}")

In [None]:
# Visualize the overlap
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Plot ensemble submission
ax = axes[0]
ax.set_title('Ensemble Submission - Group 002')
for _, row in group_002.iterrows():
    x = parse_value(row['x'])
    y = parse_value(row['y'])
    deg = parse_value(row['deg'])
    tree = create_tree(x, y, deg)
    xs, ys = tree.exterior.xy
    ax.fill(xs, ys, alpha=0.5)
    ax.plot(xs, ys, 'k-', linewidth=0.5)
ax.set_aspect('equal')
ax.grid(True)

# Plot snapshot
ax = axes[1]
ax.set_title('Snapshot 21145966992 - Group 002')
for _, row in group_002_snap.iterrows():
    x = parse_value(row['x'])
    y = parse_value(row['y'])
    deg = parse_value(row['deg'])
    tree = create_tree(x, y, deg)
    xs, ys = tree.exterior.xy
    ax.fill(xs, ys, alpha=0.5)
    ax.plot(xs, ys, 'k-', linewidth=0.5)
ax.set_aspect('equal')
ax.grid(True)

plt.tight_layout()
plt.savefig('/home/code/exploration/loop2_overlap_analysis.png', dpi=100)
plt.show()
print("Saved visualization")

In [None]:
# Check if the snapshot itself has overlaps
print("Checking all snapshots for overlaps in group 002...")

snapshot_dir = Path('/home/nonroot/snapshots/santa-2025')
for snapshot_path in sorted(snapshot_dir.iterdir())[:10]:  # Check first 10
    if snapshot_path.is_dir():
        submission_path = snapshot_path / 'submission' / 'submission.csv'
        if submission_path.exists():
            try:
                df_snap = pd.read_csv(submission_path)
                has_overlap, pair = check_group_overlaps(df_snap, 2)
                if has_overlap:
                    print(f"{snapshot_path.name}: OVERLAP in group 002")
                else:
                    print(f"{snapshot_path.name}: OK")
            except Exception as e:
                print(f"{snapshot_path.name}: Error - {e}")

In [None]:
# The issue might be that our local scoring doesn't check overlaps!
# Let's find a snapshot that has NO overlaps in any group

def check_all_overlaps(df):
    """Check all groups for overlaps"""
    overlapping = []
    for n in range(1, 201):
        has_overlap, pair = check_group_overlaps(df, n)
        if has_overlap:
            overlapping.append(n)
    return overlapping

print("Finding snapshots with no overlaps...")
clean_snapshots = []

for snapshot_path in sorted(snapshot_dir.iterdir()):
    if snapshot_path.is_dir():
        submission_path = snapshot_path / 'submission' / 'submission.csv'
        if submission_path.exists():
            try:
                df_snap = pd.read_csv(submission_path)
                overlapping = check_all_overlaps(df_snap)
                if len(overlapping) == 0:
                    clean_snapshots.append(snapshot_path.name)
                    print(f"{snapshot_path.name}: CLEAN (no overlaps)")
                else:
                    print(f"{snapshot_path.name}: {len(overlapping)} overlapping groups")
            except Exception as e:
                print(f"{snapshot_path.name}: Error - {e}")

print(f"\nFound {len(clean_snapshots)} clean snapshots")