# Loop 3 Analysis: Fix Overlapping Trees Issue

The submission failed with 'Overlapping trees in group 193'. We need to:
1. Identify and fix the overlap issue
2. Validate the submission properly before submitting
3. Analyze worst-performing N values for targeted optimization

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import os

getcontext().prec = 25
scale_factor = Decimal("1e18")

print("Libraries loaded")

Libraries loaded


In [2]:
# ChristmasTree class for validation
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def strip_s(val):
    s = str(val)
    return s[1:] if s.startswith('s') else s

def load_trees(n, df):
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = strip_s(row["x"])
        y = strip_s(row["y"])
        deg = strip_s(row["deg"])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        for idx in tree_index.query(poly):
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print("Validation functions defined")

Validation functions defined


In [3]:
# Check the current submission file for overlaps
print("Checking /home/submission/submission.csv for overlaps...")
df = pd.read_csv('/home/submission/submission.csv')
print(f"Total rows: {len(df)}")
print(f"Columns: {df.columns.tolist()}")
print(f"Sample rows:")
print(df.head(10))

Checking /home/submission/submission.csv for overlaps...
Total rows: 20100
Columns: ['id', 'x', 'y', 'deg']
Sample rows:
      id                       x                      y                  deg
0  001_0       s0.21838174559571     s1.782235032116043                s45.0
1  002_0     s-0.369007661957598    s-0.223935163032579   s23.62938066690714
2  002_1     s-0.060813611847548     s0.298983537378646  s203.62938681914625
3  003_0   s-0.12000454220459456   s0.41575650663067487  s156.35666464132777
4  003_1     s0.3575811725991934     s0.326871291674461     s205.25702762583
5  003_2  s-0.022787710235948055  s-0.17274483902103246  s244.82706504194107
6  004_0      s0.920907830499314     s-0.03662860074967    s24.1023816202337
7  004_1      s0.277666850733957    s-0.036627144109313   s23.67937006197792
8  004_2      s0.921314919121653     s0.829668909974559  s204.19009554669015
9  004_3      s0.284529671382951     s0.829496153568729  s203.73863354151138


In [4]:
# Check for overlaps in all groups
overlapping_groups = []
for n in range(1, 201):
    trees = load_trees(n, df)
    if len(trees) != n:
        print(f"Group {n}: Expected {n} trees, got {len(trees)}")
        continue
    if has_overlap(trees):
        overlapping_groups.append(n)
        if n == 193:
            print(f"Group {n}: OVERLAP DETECTED (this is the error group)")

print(f"\nTotal overlapping groups: {len(overlapping_groups)}")
if overlapping_groups:
    print(f"Overlapping groups: {overlapping_groups[:20]}...")

Group 193: OVERLAP DETECTED (this is the error group)



Total overlapping groups: 1
Overlapping groups: [193]...


In [5]:
# Check the ensemble baseline for overlaps
print("\nChecking ensemble_baseline.csv for overlaps...")
df_baseline = pd.read_csv('/home/code/experiments/003_ensemble/ensemble_baseline.csv')
overlapping_baseline = []
for n in range(1, 201):
    trees = load_trees(n, df_baseline)
    if len(trees) != n:
        continue
    if has_overlap(trees):
        overlapping_baseline.append(n)

print(f"Overlapping groups in baseline: {len(overlapping_baseline)}")
if overlapping_baseline:
    print(f"Groups: {overlapping_baseline[:20]}...")


Checking ensemble_baseline.csv for overlaps...


Overlapping groups in baseline: 1
Groups: [193]...


In [6]:
# Check the best_seed2.csv (the best C++ optimizer output)
print("\nChecking best_seed2.csv for overlaps...")
df_seed2 = pd.read_csv('/home/code/experiments/003_ensemble/best_seed2.csv')
overlapping_seed2 = []
for n in range(1, 201):
    trees = load_trees(n, df_seed2)
    if len(trees) != n:
        continue
    if has_overlap(trees):
        overlapping_seed2.append(n)

print(f"Overlapping groups in best_seed2: {len(overlapping_seed2)}")
if overlapping_seed2:
    print(f"Groups: {overlapping_seed2[:20]}...")


Checking best_seed2.csv for overlaps...


Overlapping groups in best_seed2: 1
Groups: [193]...


In [None]:
# Check the submission_fd.csv (fix direction output)
print("\nChecking submission_fd.csv for overlaps...")
df_fd = pd.read_csv('/home/code/experiments/003_ensemble/submission_fd.csv')
print(f"Total rows: {len(df_fd)}")
overlapping_fd = []
for n in range(1, 201):
    trees = load_trees(n, df_fd)
    if len(trees) != n:
        print(f"Group {n}: Expected {n} trees, got {len(trees)}")
        continue
    if has_overlap(trees):
        overlapping_fd.append(n)

print(f"Overlapping groups in submission_fd: {len(overlapping_fd)}")
if overlapping_fd:
    print(f"Groups: {overlapping_fd[:20]}...")

In [7]:
# Find the source of the overlap in group 193
import glob

# Score all CSVs and find which one has the best score for N=193 without overlap
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

def score_group_fast(xs, ys, degs):
    n = len(xs)
    if n == 0:
        return float('inf')
    
    all_x = []
    all_y = []
    
    for i in range(n):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        px = TX * c - TY * s + xs[i]
        py = TX * s + TY * c + ys[i]
        all_x.extend(px)
        all_y.extend(py)
    
    all_x = np.array(all_x)
    all_y = np.array(all_y)
    
    side = max(all_x.max() - all_x.min(), all_y.max() - all_y.min())
    return side * side / n

# Find all CSV files
snapshot_csvs = glob.glob('/home/nonroot/snapshots/santa-2025/**/*.csv', recursive=True)
our_csvs = glob.glob('/home/code/submission_candidates/*.csv')
our_csvs += glob.glob('/home/code/experiments/*/submission*.csv')
our_csvs += glob.glob('/home/code/experiments/*/*.csv')
all_csvs = list(set(snapshot_csvs + our_csvs))

print(f"Checking {len(all_csvs)} CSV files for N=193 without overlap...")

best_193_score = float('inf')
best_193_source = None
best_193_has_overlap = True

for csv_path in all_csvs:
    try:
        df_check = pd.read_csv(csv_path)
        if not {'id', 'x', 'y', 'deg'}.issubset(df_check.columns):
            continue
        
        trees = load_trees(193, df_check)
        if len(trees) != 193:
            continue
        
        has_ov = has_overlap(trees)
        
        # Calculate score
        xs = [float(t.center_x) for t in trees]
        ys = [float(t.center_y) for t in trees]
        degs = [float(t.angle) for t in trees]
        score = score_group_fast(np.array(xs), np.array(ys), np.array(degs))
        
        if not has_ov and score < best_193_score:
            best_193_score = score
            best_193_source = csv_path
            best_193_has_overlap = has_ov
            print(f"  Found valid N=193: score={score:.6f} from {csv_path.split('/')[-1]}")
    except:
        continue

print(f"\nBest N=193 without overlap: score={best_193_score:.6f} from {best_193_source}")

Checking 143 CSV files for N=193 without overlap...
  Found valid N=193: score=0.643878 from best_seed1.csv


  Found valid N=193: score=0.419689 from candidate_001.csv



Best N=193 without overlap: score=0.419689 from /home/nonroot/snapshots/santa-2025/20992150197/code/submission_candidates/candidate_001.csv


In [8]:
# Create a fixed submission by replacing group 193 with the valid one
print("Creating fixed submission...")

# Load the best submission (best_seed2.csv)
df_best = pd.read_csv('/home/code/experiments/003_ensemble/best_seed2.csv')

# Load the donor file with valid N=193
df_donor = pd.read_csv('/home/nonroot/snapshots/santa-2025/20992150197/code/submission_candidates/candidate_001.csv')

# Remove group 193 from best submission
df_best['N'] = df_best['id'].str.split('_').str[0].astype(int)
df_without_193 = df_best[df_best['N'] != 193].drop(columns=['N'])

# Get group 193 from donor
df_donor['N'] = df_donor['id'].str.split('_').str[0].astype(int)
df_193_donor = df_donor[df_donor['N'] == 193].drop(columns=['N'])

# Combine
df_fixed = pd.concat([df_without_193, df_193_donor], ignore_index=True)

# Sort by id
df_fixed['N'] = df_fixed['id'].str.split('_').str[0].astype(int)
df_fixed['tree_idx'] = df_fixed['id'].str.split('_').str[1].astype(int)
df_fixed = df_fixed.sort_values(['N', 'tree_idx']).drop(columns=['N', 'tree_idx'])

print(f"Fixed submission has {len(df_fixed)} rows")

# Verify no overlaps
print("\\nVerifying no overlaps in fixed submission...")
overlapping_fixed = []
for n in range(1, 201):
    trees = load_trees(n, df_fixed)
    if len(trees) != n:
        print(f"Group {n}: Expected {n} trees, got {len(trees)}")
        continue
    if has_overlap(trees):
        overlapping_fixed.append(n)

print(f"Overlapping groups in fixed submission: {len(overlapping_fixed)}")
if overlapping_fixed:
    print(f"Groups: {overlapping_fixed}")

Creating fixed submission...
Fixed submission has 20100 rows
\nVerifying no overlaps in fixed submission...


Overlapping groups in fixed submission: 0


In [9]:
# Calculate the score of the fixed submission
print("Calculating score of fixed submission...")

total_score = 0.0
for n in range(1, 201):
    trees = load_trees(n, df_fixed)
    if len(trees) != n:
        print(f"Group {n}: Expected {n} trees, got {len(trees)}")
        total_score = float('inf')
        break
    
    xs = [float(t.center_x) for t in trees]
    ys = [float(t.center_y) for t in trees]
    degs = [float(t.angle) for t in trees]
    score = score_group_fast(np.array(xs), np.array(ys), np.array(degs))
    total_score += score

print(f"Fixed submission score: {total_score:.6f}")
print(f"Previous score (with overlap): 84.894026")
print(f"Difference: {total_score - 84.894026:.6f}")

# Save the fixed submission
df_fixed.to_csv('/home/submission/submission.csv', index=False)
print("\\nSaved to /home/submission/submission.csv")

Calculating score of fixed submission...


Fixed submission score: 84.901044
Previous score (with overlap): 84.894026
Difference: 0.007018
\nSaved to /home/submission/submission.csv


In [10]:
# Analyze worst-performing N values\nprint("Analyzing worst-performing N values...")\n\nscores_per_n = []\nfor n in range(1, 201):\n    trees = load_trees(n, df_fixed)\n    if len(trees) != n:\n        continue\n    \n    xs = [float(t.center_x) for t in trees]\n    ys = [float(t.center_y) for t in trees]\n    degs = [float(t.angle) for t in trees]\n    score = score_group_fast(np.array(xs), np.array(ys), np.array(degs))\n    scores_per_n.append({'N': n, 'score': score, 'contribution': score / total_score * 100})\n\ndf_scores = pd.DataFrame(scores_per_n)\ndf_scores = df_scores.sort_values('score', ascending=False)\n\nprint("\\nTop 20 worst-performing N values:")\nprint(df_scores.head(20).to_string())\n\nprint(f"\\nTop 20 contribute {df_scores.head(20)['contribution'].sum():.2f}% of total score")

In [None]:
# Analyze worst-performing N values
print("Analyzing worst-performing N values...")

scores_per_n = []
for n in range(1, 201):
    trees = load_trees(n, df_fixed)
    if len(trees) != n:
        continue
    
    xs = [float(t.center_x) for t in trees]
    ys = [float(t.center_y) for t in trees]
    degs = [float(t.angle) for t in trees]
    score = score_group_fast(np.array(xs), np.array(ys), np.array(degs))
    scores_per_n.append({'N': n, 'score': score, 'contribution': score / total_score * 100})

df_scores = pd.DataFrame(scores_per_n)
df_scores = df_scores.sort_values('score', ascending=False)

print("\nTop 20 worst-performing N values:")
print(df_scores.head(20).to_string())

print(f"\nTop 20 contribute {df_scores.head(20)['contribution'].sum():.2f}% of total score")