# Loop 1 Analysis: Investigating Overlap Issue in Group 040

The submission failed with 'Overlapping trees in group 040'. Need to:
1. Verify if best_ensemble.csv has overlaps
2. Check if precision loss during save caused the issue
3. Find a valid submission source

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.strtree import STRtree
from shapely import affinity
from decimal import Decimal, getcontext
getcontext().prec = 30

# Tree geometry - EXACT from kernel
scale_factor = Decimal("1")

def create_tree_polygon_decimal(center_x, center_y, angle):
    """Create tree polygon using Decimal precision"""
    trunk_w = Decimal("0.15")
    trunk_h = Decimal("0.2")
    base_w = Decimal("0.7")
    mid_w  = Decimal("0.4")
    top_w  = Decimal("0.25")
    tip_y = Decimal("0.8")
    tier_1_y = Decimal("0.5")
    tier_2_y = Decimal("0.25")
    base_y = Decimal("0.0")
    trunk_bottom_y = -trunk_h

    initial_polygon = Polygon(
        [
            (float(Decimal("0.0") * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal("2") * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal("4") * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal("2") * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal("4") * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal("2") * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal("2") * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal("2") * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal("2")) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal("2")) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal("2")) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal("4")) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal("2")) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal("4")) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal("2")) * scale_factor), float(tier_1_y * scale_factor)),
        ]
    )

    rotated = affinity.rotate(initial_polygon, float(angle), origin=(0, 0))
    return affinity.translate(rotated, xoff=float(center_x), yoff=float(center_y))

def has_overlap(polygons):
    if len(polygons) <= 1:
        return False
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for cand in candidates:
            if isinstance(cand, (int, np.integer)):
                j = int(cand)
                if j == i:
                    continue
                other = polygons[j]
            else:
                if cand is poly:
                    continue
                other = cand
            if poly.intersects(other) and not poly.touches(other):
                return True
    return False

print("Functions defined")

In [None]:
# Load best_ensemble.csv and check group 040
df = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv')

# Parse values - keep full precision
def parse_s(s):
    if isinstance(s, str) and s.startswith('s'):
        return s[1:]
    return str(s)

df['x_str'] = df['x'].apply(parse_s)
df['y_str'] = df['y'].apply(parse_s)
df['deg_str'] = df['deg'].apply(parse_s)

# Check group 040
group_040 = df[df['id'].str.startswith('040_')].copy()
print(f"Group 040 has {len(group_040)} trees")
print(group_040[['id', 'x_str', 'y_str', 'deg_str']].head(10))

In [None]:
# Check for overlaps in group 040 using full precision
polygons_040 = []
for _, row in group_040.iterrows():
    x = Decimal(row['x_str'])
    y = Decimal(row['y_str'])
    deg = Decimal(row['deg_str'])
    poly = create_tree_polygon_decimal(x, y, deg)
    polygons_040.append(poly)

overlap_040 = has_overlap(polygons_040)
print(f"Group 040 has overlap: {overlap_040}")

In [None]:
# Check ALL groups for overlaps
print("Checking all groups for overlaps...")
overlapping_groups = []

for n in range(1, 201):
    prefix = f'{n:03d}_'
    group = df[df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in group.iterrows():
        x = Decimal(row['x_str'])
        y = Decimal(row['y_str'])
        deg = Decimal(row['deg_str'])
        poly = create_tree_polygon_decimal(x, y, deg)
        polygons.append(poly)
    
    if has_overlap(polygons):
        overlapping_groups.append(n)
        print(f"N={n:3d}: OVERLAP!")

print(f"\nTotal overlapping groups: {len(overlapping_groups)}")
print(f"Groups: {overlapping_groups}")

In [None]:
# Check other pre-optimized solutions for overlaps
print("\n=== Checking bucket-of-chump/submission.csv ===")
df_boc = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv')
df_boc['x_str'] = df_boc['x'].apply(parse_s)
df_boc['y_str'] = df_boc['y'].apply(parse_s)
df_boc['deg_str'] = df_boc['deg'].apply(parse_s)

overlapping_boc = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    group = df_boc[df_boc['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in group.iterrows():
        x = Decimal(row['x_str'])
        y = Decimal(row['y_str'])
        deg = Decimal(row['deg_str'])
        poly = create_tree_polygon_decimal(x, y, deg)
        polygons.append(poly)
    
    if has_overlap(polygons):
        overlapping_boc.append(n)

print(f"Overlapping groups in bucket-of-chump: {overlapping_boc}")
if not overlapping_boc:
    print("bucket-of-chump is overlap-free!")

In [None]:
# Calculate score for bucket-of-chump
from shapely.ops import unary_union

def calculate_score_decimal(df):
    total_score = Decimal('0')
    scores_by_n = {}
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        group = df[df['id'].str.startswith(prefix)]
        
        polygons = []
        for _, row in group.iterrows():
            x = Decimal(row['x_str'])
            y = Decimal(row['y_str'])
            deg = Decimal(row['deg_str'])
            poly = create_tree_polygon_decimal(x, y, deg)
            polygons.append(poly)
        
        bounds = unary_union(polygons).bounds
        side = Decimal(str(max(bounds[2] - bounds[0], bounds[3] - bounds[1])))
        score_n = side ** 2 / Decimal(str(n))
        scores_by_n[n] = {'side': float(side), 'score': float(score_n)}
        total_score += score_n
    
    return float(total_score), scores_by_n

print("Calculating score for bucket-of-chump...")
boc_score, boc_scores_by_n = calculate_score_decimal(df_boc)
print(f"bucket-of-chump score: {boc_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {boc_score - 68.919154:.6f}")

In [None]:
# Check santa25-public submissions
print("\n=== Checking santa25-public submissions ===")
import os

santa_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public'
for fname in sorted(os.listdir(santa_dir)):
    if fname.endswith('.csv'):
        fpath = os.path.join(santa_dir, fname)
        df_s = pd.read_csv(fpath)
        df_s['x_str'] = df_s['x'].apply(parse_s)
        df_s['y_str'] = df_s['y'].apply(parse_s)
        df_s['deg_str'] = df_s['deg'].apply(parse_s)
        
        overlapping = []
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            group = df_s[df_s['id'].str.startswith(prefix)]
            if len(group) == 0:
                continue
            
            polygons = []
            for _, row in group.iterrows():
                x = Decimal(row['x_str'])
                y = Decimal(row['y_str'])
                deg = Decimal(row['deg_str'])
                poly = create_tree_polygon_decimal(x, y, deg)
                polygons.append(poly)
            
            if has_overlap(polygons):
                overlapping.append(n)
        
        if not overlapping:
            # Calculate score
            score, _ = calculate_score_decimal(df_s)
            print(f"{fname}: overlap-free, score={score:.6f}")
        else:
            print(f"{fname}: overlaps in groups {overlapping[:5]}...")