# Loop 1 Analysis: Fix Overlapping Trees Issue

The baseline submission failed with 'Overlapping trees in group 070'. Need to:
1. Verify overlap detection
2. Find and fix overlapping trees
3. Create a valid submission

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely import STRtree
import os

os.chdir('/home/code')

# Tree vertices
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

def get_tree_polygon(x, y, deg):
    """Get tree polygon at position (x,y) with rotation deg"""
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    vertices = []
    for i in range(len(TX)):
        vx = TX[i] * c - TY[i] * s + x
        vy = TX[i] * s + TY[i] * c + y
        vertices.append((vx, vy))
    return Polygon(vertices)

def check_overlaps(df, n):
    """Check for overlapping trees in group n"""
    group = df[df['n'] == n].copy()
    if len(group) != n:
        return f"Wrong number of trees: {len(group)} vs {n}"
    
    polygons = []
    for _, row in group.iterrows():
        poly = get_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    # Check all pairs for overlap
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                # Check if it's more than just touching
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-10:  # Non-trivial overlap
                    overlaps.append((i, j, intersection.area))
    
    return overlaps

print("Loading baseline submission...")
df = pd.read_csv('experiments/001_baseline/baseline.csv')
df['x_val'] = df['x'].str[1:].astype(float)
df['y_val'] = df['y'].str[1:].astype(float)
df['deg_val'] = df['deg'].str[1:].astype(float)
df['n'] = df['id'].str.split('_').str[0].astype(int)
print(f"Loaded {len(df)} rows")

Loading baseline submission...
Loaded 20100 rows


In [2]:
# Check N=70 for overlaps
print("Checking N=70 for overlaps...")
overlaps_70 = check_overlaps(df, 70)
if overlaps_70:
    print(f"Found {len(overlaps_70)} overlapping pairs in N=70:")
    for i, j, area in overlaps_70[:10]:
        print(f"  Trees {i} and {j}: overlap area = {area:.10f}")
else:
    print("No overlaps found in N=70")

Checking N=70 for overlaps...
No overlaps found in N=70


In [3]:
# Check all N values for overlaps
print("\nChecking all N values for overlaps...")
all_overlaps = {}
for n in range(1, 201):
    overlaps = check_overlaps(df, n)
    if overlaps and isinstance(overlaps, list) and len(overlaps) > 0:
        all_overlaps[n] = overlaps
        
print(f"\nFound overlaps in {len(all_overlaps)} groups:")
for n, overlaps in sorted(all_overlaps.items()):
    print(f"  N={n}: {len(overlaps)} overlapping pairs")


Checking all N values for overlaps...



Found overlaps in 0 groups:


In [4]:
# Let's look at the snapshots to find one without overlaps
import os

snapshot_dir = '/home/nonroot/snapshots/santa-2025'
snapshots = sorted(os.listdir(snapshot_dir))
print(f"Found {len(snapshots)} snapshots")
print(f"Latest 5: {snapshots[-5:]}")

Found 100 snapshots
Latest 5: ['21328308881', '21328309254', '21328309666', '21328310048', '21328310479']


In [5]:
# Check a few snapshots for valid submissions
def check_submission_validity(filepath):
    """Check if a submission has any overlaps"""
    try:
        df = pd.read_csv(filepath)
        df['x_val'] = df['x'].str[1:].astype(float)
        df['y_val'] = df['y'].str[1:].astype(float)
        df['deg_val'] = df['deg'].str[1:].astype(float)
        df['n'] = df['id'].str.split('_').str[0].astype(int)
        
        # Quick check - just check a few N values
        for n in [70, 100, 150, 200]:
            overlaps = check_overlaps(df, n)
            if overlaps and isinstance(overlaps, list) and len(overlaps) > 0:
                return False, n
        return True, None
    except Exception as e:
        return False, str(e)

# Check latest snapshots
for snapshot in snapshots[-10:]:
    filepath = f"{snapshot_dir}/{snapshot}/submission/submission.csv"
    if os.path.exists(filepath):
        valid, issue = check_submission_validity(filepath)
        print(f"{snapshot}: {'VALID' if valid else f'INVALID (N={issue})'}")
    else:
        print(f"{snapshot}: No submission.csv")

21322576451: VALID


21322576827: VALID


21322577324: VALID
21322577814: No submission.csv


21322578388: VALID


21328308881: VALID


21328309254: VALID


21328309666: VALID


21328310048: VALID
21328310479: INVALID (N=70)


In [7]:
# The issue might be precision-related. Let's check the sample submission format
# and look at how the kernels handle overlap detection

# First, let's find a valid snapshot and score it
def score_submission(filepath):
    """Calculate total score for a submission"""
    df = pd.read_csv(filepath)
    df['x_val'] = df['x'].str[1:].astype(float)
    df['y_val'] = df['y'].str[1:].astype(float)
    df['deg_val'] = df['deg'].str[1:].astype(float)
    df['n'] = df['id'].str.split('_').str[0].astype(int)
    
    total_score = 0
    for n in range(1, 201):
        group = df[df['n'] == n]
        xs = group['x_val'].values
        ys = group['y_val'].values
        degs = group['deg_val'].values
        
        all_x, all_y = [], []
        for i in range(n):
            rad = np.radians(degs[i])
            c, s = np.cos(rad), np.sin(rad)
            for j in range(len(TX)):
                x = TX[j] * c - TY[j] * s + xs[i]
                y = TX[j] * s + TY[j] * c + ys[i]
                all_x.append(x)
                all_y.append(y)
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        total_score += side * side / n
    
    return total_score

# Score the valid snapshots
print("Scoring valid snapshots...")
valid_snapshots = []
for snapshot in snapshots:
    filepath = f"{snapshot_dir}/{snapshot}/submission/submission.csv"
    if os.path.exists(filepath):
        try:
            score = score_submission(filepath)
            valid_snapshots.append((snapshot, score, filepath))
        except Exception as e:
            pass

valid_snapshots.sort(key=lambda x: x[1])
print(f"\nTop 10 best scoring snapshots:")
for snapshot, score, filepath in valid_snapshots[:10]:
    print(f"  {snapshot}: {score:.6f}")

Scoring valid snapshots...



Top 10 best scoring snapshots:
  21145965159: 38.909987
  21328310479: 70.523320
  21145966992: 70.572798
  21322576827: 70.616145
  21322576451: 70.619825
  21328309666: 70.619825
  21198925328: 70.624381
  21198927060: 70.624381
  21222373488: 70.624381
  21222375510: 70.624381


In [8]:
# Check the best snapshot more carefully
best_snapshot = '21328310479'
best_filepath = f"{snapshot_dir}/{best_snapshot}/submission/submission.csv"

# Load and check
df_best = pd.read_csv(best_filepath)
print(f"Shape: {df_best.shape}")
print(f"Expected: 20100 rows (1+2+...+200)")
print(df_best.head())

# Check for proper format
print(f"\nFirst few x values: {df_best['x'].head().tolist()}")
print(f"First few y values: {df_best['y'].head().tolist()}")
print(f"First few deg values: {df_best['deg'].head().tolist()}")

Shape: (20100, 4)
Expected: 20100 rows (1+2+...+200)
      id                       x                        y  \
0  001_0  s43.591192092102147626  s-31.783267068741778871   
1  002_0   s0.154097069621355887   s-0.038540742694794648   
2  002_1  s-0.154097069621372845   s-0.561459257305224058   
3  003_0      s0.254937643697833      s-0.233436061549416   
4  003_1      s0.357722754471247       s0.250360566787394   

                       deg  
0   s44.999999999999978684  
1  s144.272760863123583164  
2  s324.272760863123533426  
3      s113.56326044172948  
4         s66.370622269343  

First few x values: ['s43.591192092102147626', 's0.154097069621355887', 's-0.154097069621372845', 's0.254937643697833', 's0.357722754471247']
First few y values: ['s-31.783267068741778871', 's-0.038540742694794648', 's-0.561459257305224058', 's-0.233436061549416', 's0.250360566787394']
First few deg values: ['s44.999999999999978684', 's144.272760863123583164', 's324.272760863123533426', 's113.563260441

In [None]:
# Check all N values in this submission for overlaps (full check)
df_best['x_val'] = df_best['x'].str[1:].astype(float)
df_best['y_val'] = df_best['y'].str[1:].astype(float)
df_best['deg_val'] = df_best['deg'].str[1:].astype(float)
df_best['n'] = df_best['id'].str.split('_').str[0].astype(int)

print("Checking ALL N values for overlaps in best snapshot...")
all_overlaps_best = {}
for n in range(1, 201):
    overlaps = check_overlaps(df_best, n)
    if overlaps and isinstance(overlaps, list) and len(overlaps) > 0:
        all_overlaps_best[n] = overlaps

if all_overlaps_best:
    print(f"\nFound overlaps in {len(all_overlaps_best)} groups:")
    for n, overlaps in sorted(all_overlaps_best.items()):
        print(f"  N={n}: {len(overlaps)} overlapping pairs")
else:
    print("\nNo overlaps found! This submission should be valid.")