# Loop 1 Analysis: Investigating Overlap Issue in Group 040

The submission failed with 'Overlapping trees in group 040'. Need to:
1. Verify if best_ensemble.csv has overlaps
2. Check if precision loss during save caused the issue
3. Find a valid submission source

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.strtree import STRtree
from shapely import affinity
from decimal import Decimal, getcontext
getcontext().prec = 30

# Tree geometry - EXACT from kernel
scale_factor = Decimal("1")

def create_tree_polygon_decimal(center_x, center_y, angle):
    """Create tree polygon using Decimal precision"""
    trunk_w = Decimal("0.15")
    trunk_h = Decimal("0.2")
    base_w = Decimal("0.7")
    mid_w  = Decimal("0.4")
    top_w  = Decimal("0.25")
    tip_y = Decimal("0.8")
    tier_1_y = Decimal("0.5")
    tier_2_y = Decimal("0.25")
    base_y = Decimal("0.0")
    trunk_bottom_y = -trunk_h

    initial_polygon = Polygon(
        [
            (float(Decimal("0.0") * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal("2") * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal("4") * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal("2") * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal("4") * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal("2") * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal("2") * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal("2") * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal("2")) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal("2")) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal("2")) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal("4")) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal("2")) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal("4")) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal("2")) * scale_factor), float(tier_1_y * scale_factor)),
        ]
    )

    rotated = affinity.rotate(initial_polygon, float(angle), origin=(0, 0))
    return affinity.translate(rotated, xoff=float(center_x), yoff=float(center_y))

def has_overlap(polygons):
    if len(polygons) <= 1:
        return False
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for cand in candidates:
            if isinstance(cand, (int, np.integer)):
                j = int(cand)
                if j == i:
                    continue
                other = polygons[j]
            else:
                if cand is poly:
                    continue
                other = cand
            if poly.intersects(other) and not poly.touches(other):
                return True
    return False

print("Functions defined")

Functions defined


In [2]:
# Load best_ensemble.csv and check group 040
df = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv')

# Parse values - keep full precision
def parse_s(s):
    if isinstance(s, str) and s.startswith('s'):
        return s[1:]
    return str(s)

df['x_str'] = df['x'].apply(parse_s)
df['y_str'] = df['y'].apply(parse_s)
df['deg_str'] = df['deg'].apply(parse_s)

# Check group 040
group_040 = df[df['id'].str.startswith('040_')].copy()
print(f"Group 040 has {len(group_040)} trees")
print(group_040[['id', 'x_str', 'y_str', 'deg_str']].head(10))

Group 040 has 40 trees
        id                  x_str                  y_str  \
780  040_0  -1.689632103422090692  -0.247894431182562769   
781  040_1   0.953906339823958005  -0.552668496076432425   
782  040_2  -0.821313951420922450   0.356433575901729194   
783  040_3  -1.689632103415815934   0.489826127465763816   
784  040_4   0.842519162317999837  -1.882363352068190610   
785  040_5  -1.690954908480060803  -1.834206437330584194   
786  040_6  -0.874184327926464277  -1.869729286109716559   
787  040_7  -0.026478073822319651  -1.869688575766734084   
788  040_8   0.133073013205790763   0.291614148128908235   
789  040_9  -1.689632103444474343  -1.030321431471046933   

                    deg_str  
780  252.060472972624125987  
781  252.151405771974452819  
782  252.217176846480043650  
783  252.060472935714216192  
784  246.370622269343670041  
785  242.547378445396873303  
786  252.223494727359053513  
787  252.111808431677388853  
788  252.217182198741710408  
789  252.0604731

In [3]:
# Check for overlaps in group 040 using full precision
polygons_040 = []
for _, row in group_040.iterrows():
    x = Decimal(row['x_str'])
    y = Decimal(row['y_str'])
    deg = Decimal(row['deg_str'])
    poly = create_tree_polygon_decimal(x, y, deg)
    polygons_040.append(poly)

overlap_040 = has_overlap(polygons_040)
print(f"Group 040 has overlap: {overlap_040}")

Group 040 has overlap: False


In [4]:
# Check ALL groups for overlaps
print("Checking all groups for overlaps...")
overlapping_groups = []

for n in range(1, 201):
    prefix = f'{n:03d}_'
    group = df[df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in group.iterrows():
        x = Decimal(row['x_str'])
        y = Decimal(row['y_str'])
        deg = Decimal(row['deg_str'])
        poly = create_tree_polygon_decimal(x, y, deg)
        polygons.append(poly)
    
    if has_overlap(polygons):
        overlapping_groups.append(n)
        print(f"N={n:3d}: OVERLAP!")

print(f"\nTotal overlapping groups: {len(overlapping_groups)}")
print(f"Groups: {overlapping_groups}")

Checking all groups for overlaps...



Total overlapping groups: 0
Groups: []


In [10]:
# Check other pre-optimized solutions for overlaps
print("\n=== Checking bucket-of-chump/submission.csv ===")
df_boc = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv')
df_boc['x_str'] = df_boc['x'].apply(parse_s)
df_boc['y_str'] = df_boc['y'].apply(parse_s)
df_boc['deg_str'] = df_boc['deg'].apply(parse_s)

overlapping_boc = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    group = df_boc[df_boc['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in group.iterrows():
        x = Decimal(row['x_str'])
        y = Decimal(row['y_str'])
        deg = Decimal(row['deg_str'])
        poly = create_tree_polygon_decimal(x, y, deg)
        polygons.append(poly)
    
    if has_overlap(polygons):
        overlapping_boc.append(n)

print(f"Overlapping groups in bucket-of-chump: {overlapping_boc}")
if not overlapping_boc:
    print("bucket-of-chump is overlap-free!")


=== Checking bucket-of-chump/submission.csv ===


Overlapping groups in bucket-of-chump: [12, 54, 63, 116, 134, 170]


In [11]:
# Calculate score for bucket-of-chump
from shapely.ops import unary_union

def calculate_score_decimal(df):
    total_score = Decimal('0')
    scores_by_n = {}
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        group = df[df['id'].str.startswith(prefix)]
        
        polygons = []
        for _, row in group.iterrows():
            x = Decimal(row['x_str'])
            y = Decimal(row['y_str'])
            deg = Decimal(row['deg_str'])
            poly = create_tree_polygon_decimal(x, y, deg)
            polygons.append(poly)
        
        bounds = unary_union(polygons).bounds
        side = Decimal(str(max(bounds[2] - bounds[0], bounds[3] - bounds[1])))
        score_n = side ** 2 / Decimal(str(n))
        scores_by_n[n] = {'side': float(side), 'score': float(score_n)}
        total_score += score_n
    
    return float(total_score), scores_by_n

print("Calculating score for bucket-of-chump...")
boc_score, boc_scores_by_n = calculate_score_decimal(df_boc)
print(f"bucket-of-chump score: {boc_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {boc_score - 68.919154:.6f}")

Calculating score for bucket-of-chump...


bucket-of-chump score: 70.676501
Target: 68.919154
Gap: 1.757347


In [12]:
# Check santa25-public submissions
print("\n=== Checking santa25-public submissions ===")
import os

santa_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public'
for fname in sorted(os.listdir(santa_dir)):
    if fname.endswith('.csv'):
        fpath = os.path.join(santa_dir, fname)
        df_s = pd.read_csv(fpath)
        df_s['x_str'] = df_s['x'].apply(parse_s)
        df_s['y_str'] = df_s['y'].apply(parse_s)
        df_s['deg_str'] = df_s['deg'].apply(parse_s)
        
        overlapping = []
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            group = df_s[df_s['id'].str.startswith(prefix)]
            if len(group) == 0:
                continue
            
            polygons = []
            for _, row in group.iterrows():
                x = Decimal(row['x_str'])
                y = Decimal(row['y_str'])
                deg = Decimal(row['deg_str'])
                poly = create_tree_polygon_decimal(x, y, deg)
                polygons.append(poly)
            
            if has_overlap(polygons):
                overlapping.append(n)
        
        if not overlapping:
            # Calculate score
            score, _ = calculate_score_decimal(df_s)
            print(f"{fname}: overlap-free, score={score:.6f}")
        else:
            print(f"{fname}: overlaps in groups {overlapping[:5]}...")


=== Checking santa25-public submissions ===


New_Tree_144_196.csv: overlap-free, score=72.927920


santa2025_ver2_v61.csv: overlap-free, score=72.951925


santa2025_ver2_v63.csv: overlap-free, score=72.947427


santa2025_ver2_v65.csv: overlap-free, score=72.935294


santa2025_ver2_v66.csv: overlap-free, score=72.938599


santa2025_ver2_v67.csv: overlap-free, score=72.938567


santa2025_ver2_v68.csv: overlap-free, score=72.939233


santa2025_ver2_v69.csv: overlap-free, score=72.850110


santa2025_ver2_v76.csv: overlap-free, score=72.826444


submission_70_926149550346.csv: overlap-free, score=70.926150


submission_70_936673758122.csv: overlap-free, score=70.936674


submission_JKoT1.csv: overlap-free, score=72.489483


submission_JKoT2.csv: overlap-free, score=72.489348


submission_JKoT3.csv: overlap-free, score=72.489488


submission_JKoT4.csv: overlap-free, score=72.489504


submission_opt1.csv: overlap-free, score=70.990692


In [5]:
# Check the candidate file that was submitted
print("=== Checking candidate_000.csv ===")
df_cand = pd.read_csv('/home/code/submission_candidates/candidate_000.csv')
df_cand['x_str'] = df_cand['x'].apply(parse_s)
df_cand['y_str'] = df_cand['y'].apply(parse_s)
df_cand['deg_str'] = df_cand['deg'].apply(parse_s)

# Check group 040
group_040_cand = df_cand[df_cand['id'].str.startswith('040_')].copy()
print(f"Group 040 in candidate has {len(group_040_cand)} trees")
print(group_040_cand[['id', 'x_str', 'y_str', 'deg_str']].head(10))

# Check for overlaps
polygons_040_cand = []
for _, row in group_040_cand.iterrows():
    x = Decimal(row['x_str'])
    y = Decimal(row['y_str'])
    deg = Decimal(row['deg_str'])
    poly = create_tree_polygon_decimal(x, y, deg)
    polygons_040_cand.append(poly)

overlap_040_cand = has_overlap(polygons_040_cand)
print(f"\nGroup 040 in candidate has overlap: {overlap_040_cand}")

=== Checking candidate_000.csv ===
Group 040 in candidate has 40 trees
        id                 x_str                 y_str             deg_str
780  040_0   -1.6896321034220907  -0.24789443118256277  252.06047297262413
781  040_1     0.953906339823958   -0.5526684960764324  252.15140577197445
782  040_2   -0.8213139514209225    0.3564335759017292  252.21717684648004
783  040_3    -1.689632103415816    0.4898261274657638  252.06047293571422
784  040_4    0.8425191623179998   -1.8823633520681906  246.37062226934367
785  040_5   -1.6909549084800608   -1.8342064373305842  242.54737844539687
786  040_6   -0.8741843279264643   -1.8697292861097166  252.22349472735905
787  040_7  -0.02647807382231965    -1.869688575766734   252.1118084316774
788  040_8   0.13307301320579076   0.29161414812890823   252.2171821987417
789  040_9   -1.6896321034444743    -1.030321431471047  252.06047310416656

Group 040 in candidate has overlap: False


In [6]:
# More detailed overlap check - find which pairs might be overlapping\nprint("Detailed overlap check for group 040 in candidate...\")\n\nfor i in range(len(polygons_040_cand)):\n    for j in range(i+1, len(polygons_040_cand)):\n        p1, p2 = polygons_040_cand[i], polygons_040_cand[j]\n        if p1.intersects(p2):\n            intersection = p1.intersection(p2)\n            if intersection.area > 0:\n                print(f\"Trees {i} and {j}: intersection area = {intersection.area:.15f}\")\n            elif not p1.touches(p2):\n                print(f\"Trees {i} and {j}: intersects but doesn't touch (edge case)\")

In [7]:
# More detailed overlap check - find which pairs might be overlapping
print("Detailed overlap check for group 040 in candidate...")

for i in range(len(polygons_040_cand)):
    for j in range(i+1, len(polygons_040_cand)):
        p1, p2 = polygons_040_cand[i], polygons_040_cand[j]
        if p1.intersects(p2):
            intersection = p1.intersection(p2)
            if intersection.area > 0:
                print(f"Trees {i} and {j}: intersection area = {intersection.area:.15f}")
            elif not p1.touches(p2):
                print(f"Trees {i} and {j}: intersects but doesn't touch (edge case)")

Detailed overlap check for group 040 in candidate...


In [8]:
# The issue might be that we need to preserve the EXACT string format from the original file\n# Let's just copy the original file directly and see if it has overlaps\n\nimport shutil\n\n# Copy the original file directly\nshutil.copy('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv', '/home/submission/submission.csv')\n\n# Verify it's identical\nwith open('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv', 'r') as f1:\n    orig = f1.read()\nwith open('/home/submission/submission.csv', 'r') as f2:\n    copy = f2.read()\n\nprint(f\"Files identical: {orig == copy}\")\nprint(f\"Original size: {len(orig)} bytes\")\nprint(f\"Copy size: {len(copy)} bytes\")

In [9]:
# The issue might be that we need to preserve the EXACT string format from the original file
# Let's just copy the original file directly and see if it has overlaps

import shutil

# Copy the original file directly
shutil.copy('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv', '/home/submission/submission.csv')

# Verify it's identical
with open('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv', 'r') as f1:
    orig = f1.read()
with open('/home/submission/submission.csv', 'r') as f2:
    copy = f2.read()

print(f"Files identical: {orig == copy}")
print(f"Original size: {len(orig)} bytes")
print(f"Copy size: {len(copy)} bytes")

Files identical: True
Original size: 1595712 bytes
Copy size: 1595712 bytes


In [13]:
# Check telegram submissions
print("=== Checking telegram submissions ===")
telegram_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram'
for fname in sorted(os.listdir(telegram_dir)):
    if fname.endswith('.csv'):
        fpath = os.path.join(telegram_dir, fname)
        df_t = pd.read_csv(fpath)
        df_t['x_str'] = df_t['x'].apply(parse_s)
        df_t['y_str'] = df_t['y'].apply(parse_s)
        df_t['deg_str'] = df_t['deg'].apply(parse_s)
        
        overlapping = []
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            group = df_t[df_t['id'].str.startswith(prefix)]
            if len(group) == 0:
                continue
            
            polygons = []
            for _, row in group.iterrows():
                x = Decimal(row['x_str'])
                y = Decimal(row['y_str'])
                deg = Decimal(row['deg_str'])
                poly = create_tree_polygon_decimal(x, y, deg)
                polygons.append(poly)
            
            if has_overlap(polygons):
                overlapping.append(n)
        
        if not overlapping:
            # Calculate score
            score, _ = calculate_score_decimal(df_t)
            print(f"{fname}: overlap-free, score={score:.6f}")
        else:
            print(f"{fname}: overlaps in groups {overlapping[:5]}...")

=== Checking telegram submissions ===


71.97.csv: overlaps in groups [114, 178, 199]...


72.49.csv: overlap-free, score=72.495739


In [15]:
# Check other CSV files in preoptimized folder
print("=== Checking other preoptimized CSVs ===")
preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'
for fname in ['ensemble.csv', 'santa-2025.csv', 'submission.csv']:
    fpath = os.path.join(preopt_dir, fname)
    if os.path.exists(fpath):
        df_p = pd.read_csv(fpath)
        df_p['x_str'] = df_p['x'].apply(parse_s)
        df_p['y_str'] = df_p['y'].apply(parse_s)
        df_p['deg_str'] = df_p['deg'].apply(parse_s)
        
        overlapping = []
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            group = df_p[df_p['id'].str.startswith(prefix)]
            if len(group) == 0:
                continue
            
            polygons = []
            for _, row in group.iterrows():
                x = Decimal(row['x_str'])
                y = Decimal(row['y_str'])
                deg = Decimal(row['deg_str'])
                poly = create_tree_polygon_decimal(x, y, deg)
                polygons.append(poly)
            
            if has_overlap(polygons):
                overlapping.append(n)
        
        if not overlapping:
            score, _ = calculate_score_decimal(df_p)
            print(f"{fname}: overlap-free, score={score:.6f}")
        else:
            print(f"{fname}: overlaps in groups {overlapping[:10]}...")

=== Checking other preoptimized CSVs ===


ensemble.csv: overlap-free, score=70.676102


santa-2025.csv: overlap-free, score=70.676102


submission.csv: overlaps in groups [12, 54, 63, 116, 134, 170]...
