# Loop 4 Analysis: Understanding Group 008 Overlap Issue

All 3 submissions failed with 'Overlapping trees in group 008'. The LB uses:
- `intersects() and not touches()` for overlap detection
- `scale_factor = 1e15` for coordinates (Decimal precision)

Let me analyze exactly what's happening.

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import warnings
warnings.filterwarnings('ignore')

# Match LB precision exactly
getcontext().prec = 25
scale_factor = Decimal('1e15')

print('Libraries loaded')

Libraries loaded


In [2]:
# Tree geometry - EXACTLY as in the LB metric
trunk_w = Decimal('0.15')
trunk_h = Decimal('0.2')
base_w = Decimal('0.7')
mid_w = Decimal('0.4')
top_w = Decimal('0.25')
tip_y = Decimal('0.8')
tier_1_y = Decimal('0.5')
tier_2_y = Decimal('0.25')
base_y = Decimal('0.0')
trunk_bottom_y = -trunk_h

def create_tree_polygon_lb(center_x_str, center_y_str, angle_str):
    """Create tree polygon EXACTLY as the LB does - using Decimal and scale_factor."""
    center_x = Decimal(center_x_str)
    center_y = Decimal(center_y_str)
    angle = Decimal(angle_str)
    
    initial_polygon = Polygon([
        (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
        (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
        (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
        (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
        (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
        (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
        (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
        (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
        (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
        (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
        (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
        (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
        (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
        (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
        (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
    ])
    
    rotated = affinity.rotate(initial_polygon, float(angle), origin=(0, 0))
    polygon = affinity.translate(rotated,
                                 xoff=float(center_x * scale_factor),
                                 yoff=float(center_y * scale_factor))
    return polygon

print('Tree polygon function defined (LB-compatible)')

Tree polygon function defined (LB-compatible)


In [3]:
def check_lb_overlaps(polygons):
    """Check for overlaps using EXACT LB method."""
    overlapping_pairs = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                overlapping_pairs.append((i, j))
    return overlapping_pairs

print('LB overlap check defined')

LB overlap check defined


In [4]:
# Load and parse the submission that failed
print('Loading failed submission...')
failed_df = pd.read_csv('/home/submission/submission.csv')

# Parse group 008
group_008 = failed_df[failed_df['id'].str.startswith('008_')].copy()
print(f'Group 008 has {len(group_008)} trees')
print(group_008)

Loading failed submission...
Group 008 has 8 trees
       id                    x                    y                   deg
28  008_0  s-0.249562219766313  s-0.411091569437793   s51.766737817650146
29  008_1   s0.664667542875372  s-0.857305354342135  s113.629377785792869
30  008_2  s-0.664667542875372   s0.257305355324570  s293.629377730648400
31  008_3   s0.557305355180490   s0.326836893355834  s203.629377730656444
32  008_4   s0.249562219825709  s-0.188908429966201  s231.766737802774458
33  008_5  s-0.206588875358127  s-0.855262101537700  s293.629377785793395
34  008_6   s0.206588875126027   s0.255262101441014  s113.629377730638538
35  008_7  s-0.557305355191509  s-0.926836893355835   s23.629377730655502


In [5]:
# Create polygons for group 008 using LB method
print('\nCreating polygons using LB method...')
polygons_008 = []
for _, row in group_008.iterrows():
    x = row['x'].replace('s', '')
    y = row['y'].replace('s', '')
    deg = row['deg'].replace('s', '')
    poly = create_tree_polygon_lb(x, y, deg)
    polygons_008.append(poly)
    print(f"{row['id']}: x={x[:20]}..., y={y[:20]}..., deg={deg[:20]}...")

print(f'\nCreated {len(polygons_008)} polygons')


Creating polygons using LB method...
008_0: x=-0.249562219766313..., y=-0.411091569437793..., deg=51.766737817650146...
008_1: x=0.664667542875372..., y=-0.857305354342135..., deg=113.629377785792869...
008_2: x=-0.664667542875372..., y=0.257305355324570..., deg=293.629377730648400...
008_3: x=0.557305355180490..., y=0.326836893355834..., deg=203.629377730656444...
008_4: x=0.249562219825709..., y=-0.188908429966201..., deg=231.766737802774458...
008_5: x=-0.206588875358127..., y=-0.855262101537700..., deg=293.629377785793395...
008_6: x=0.206588875126027..., y=0.255262101441014..., deg=113.629377730638538...
008_7: x=-0.557305355191509..., y=-0.926836893355835..., deg=23.629377730655502...

Created 8 polygons


In [6]:
# Check for overlaps
print('\nChecking for LB overlaps...')
overlapping = check_lb_overlaps(polygons_008)
if overlapping:
    print(f'FOUND {len(overlapping)} overlapping pairs!')
    for i, j in overlapping:
        print(f'  Trees {i} and {j} overlap')
        # Check details
        p1, p2 = polygons_008[i], polygons_008[j]
        print(f'    intersects: {p1.intersects(p2)}')
        print(f'    touches: {p1.touches(p2)}')
        print(f'    intersection area: {p1.intersection(p2).area}')
        print(f'    distance: {p1.distance(p2)}')
else:
    print('No overlaps found!')


Checking for LB overlaps...
No overlaps found!


In [7]:
# Now check chistyakov_best.csv group 008
print('\n' + '='*60)
print('Checking chistyakov_best.csv group 008...')
print('='*60)

chistyakov_df = pd.read_csv('/home/code/preoptimized/chistyakov_best.csv')
chistyakov_008 = chistyakov_df[chistyakov_df['id'].str.startswith('008_')].copy()
print(f'Group 008 has {len(chistyakov_008)} trees')
print(chistyakov_008)


Checking chistyakov_best.csv group 008...
Group 008 has 8 trees
       id                   x                   y                   deg
28  008_0  s0.341761386723401  s0.258872906204803   s23.646305309707607
29  008_1  s0.693215346819167  s0.330887937402410  s294.280355706130138
30  008_2  s1.456049782445265  s1.513814965497219  s203.804097456462870
31  008_3  s0.649816078749063  s0.776605161343583   s51.566980831660260
32  008_4  s1.102561731960507  s1.444703347389523  s113.629348963921103
33  008_5  s1.566337805581972  s0.340772842920035  s114.280356753286952
34  008_6  s1.148907092792580  s1.004217251693608  s231.904563200216842
35  008_7  s0.234182673239713  s1.449753245285132  s293.629357977598829


In [8]:
# Create polygons for chistyakov group 008
print('\nCreating polygons for chistyakov group 008...')
chistyakov_polys = []
for _, row in chistyakov_008.iterrows():
    x = row['x'].replace('s', '')
    y = row['y'].replace('s', '')
    deg = row['deg'].replace('s', '')
    poly = create_tree_polygon_lb(x, y, deg)
    chistyakov_polys.append(poly)

# Check for overlaps
overlapping = check_lb_overlaps(chistyakov_polys)
if overlapping:
    print(f'FOUND {len(overlapping)} overlapping pairs!')
    for i, j in overlapping:
        print(f'  Trees {i} and {j} overlap')
else:
    print('No overlaps found in chistyakov group 008!')


Creating polygons for chistyakov group 008...
No overlaps found in chistyakov group 008!


In [9]:
# Check ALL groups in chistyakov_best.csv
print('\n' + '='*60)
print('Checking ALL groups in chistyakov_best.csv...')
print('='*60)

overlap_groups = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = chistyakov_df[chistyakov_df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in config_df.iterrows():
        x = row['x'].replace('s', '')
        y = row['y'].replace('s', '')
        deg = row['deg'].replace('s', '')
        poly = create_tree_polygon_lb(x, y, deg)
        polygons.append(poly)
    
    overlapping = check_lb_overlaps(polygons)
    if overlapping:
        overlap_groups.append(n)
        if n <= 20:  # Only print first few
            print(f'N={n}: {len(overlapping)} overlapping pairs')

print(f'\nTotal groups with overlaps: {len(overlap_groups)}')
if overlap_groups:
    print(f'Overlap groups: {overlap_groups[:20]}...')


Checking ALL groups in chistyakov_best.csv...



Total groups with overlaps: 0


In [10]:
# Let's check if the issue is with how we're reading the CSV
# The LB might be using a different parsing method

print('Checking raw string values in submission...')
print('\\nFailed submission group 008:')
for _, row in group_008.iterrows():
    print(f"  {row['id']}: x='{row['x']}', y='{row['y']}', deg='{row['deg']}'")

Checking raw string values in submission...
\nFailed submission group 008:
  008_0: x='s-0.249562219766313', y='s-0.411091569437793', deg='s51.766737817650146'
  008_1: x='s0.664667542875372', y='s-0.857305354342135', deg='s113.629377785792869'
  008_2: x='s-0.664667542875372', y='s0.257305355324570', deg='s293.629377730648400'
  008_3: x='s0.557305355180490', y='s0.326836893355834', deg='s203.629377730656444'
  008_4: x='s0.249562219825709', y='s-0.188908429966201', deg='s231.766737802774458'
  008_5: x='s-0.206588875358127', y='s-0.855262101537700', deg='s293.629377785793395'
  008_6: x='s0.206588875126027', y='s0.255262101441014', deg='s113.629377730638538'
  008_7: x='s-0.557305355191509', y='s-0.926836893355835', deg='s23.629377730655502'


In [11]:
# Let's also check the sample_submission.csv which we know is valid
print('Checking sample_submission.csv...')
sample_df = pd.read_csv('/home/code/preoptimized/sample_submission.csv')

overlap_groups_sample = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = sample_df[sample_df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in config_df.iterrows():
        x = row['x'].replace('s', '')
        y = row['y'].replace('s', '')
        deg = row['deg'].replace('s', '')
        poly = create_tree_polygon_lb(x, y, deg)
        polygons.append(poly)
    
    overlapping = check_lb_overlaps(polygons)
    if overlapping:
        overlap_groups_sample.append(n)

print(f'Total groups with overlaps in sample_submission: {len(overlap_groups_sample)}')

Checking sample_submission.csv...


Total groups with overlaps in sample_submission: 0


In [12]:
# Let's check ALL preoptimized files for overlaps
print('Checking ALL preoptimized files...')
print('='*60)

import os
preopt_files = [f for f in os.listdir('/home/code/preoptimized') if f.endswith('.csv')]

for filename in sorted(preopt_files):
    filepath = f'/home/code/preoptimized/{filename}'
    df = pd.read_csv(filepath)
    
    overlap_groups = []
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        config_df = df[df['id'].str.startswith(prefix)]
        
        if len(config_df) != n:
            continue
            
        polygons = []
        for _, row in config_df.iterrows():
            x = row['x'].replace('s', '')
            y = row['y'].replace('s', '')
            deg = row['deg'].replace('s', '')
            poly = create_tree_polygon_lb(x, y, deg)
            polygons.append(poly)
        
        overlapping = check_lb_overlaps(polygons)
        if overlapping:
            overlap_groups.append(n)
    
    print(f'{filename}: {len(overlap_groups)} groups with overlaps')

Checking ALL preoptimized files...


best_snapshot.csv: 2 groups with overlaps


better_ensemble.csv: 13 groups with overlaps


bucket_of_chump.csv: 11 groups with overlaps


chistyakov_best.csv: 0 groups with overlaps


ensemble_70_627.csv: 120 groups with overlaps


sample_submission.csv: 0 groups with overlaps


saspav_best.csv: 9 groups with overlaps


submission_70_926.csv: 0 groups with overlaps


In [13]:
# Check which groups in best_snapshot.csv have overlaps
print('Checking best_snapshot.csv for overlapping groups...')
best_snapshot_df = pd.read_csv('/home/code/preoptimized/best_snapshot.csv')

for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = best_snapshot_df[best_snapshot_df['id'].str.startswith(prefix)]
    
    if len(config_df) != n:
        continue
        
    polygons = []
    for _, row in config_df.iterrows():
        x = row['x'].replace('s', '')
        y = row['y'].replace('s', '')
        deg = row['deg'].replace('s', '')
        poly = create_tree_polygon_lb(x, y, deg)
        polygons.append(poly)
    
    overlapping = check_lb_overlaps(polygons)
    if overlapping:
        print(f'N={n}: {len(overlapping)} overlapping pairs - {overlapping}')

Checking best_snapshot.csv for overlapping groups...


N=158: 1 overlapping pairs - [(6, 145)]


N=184: 1 overlapping pairs - [(45, 147)]


In [14]:
# Let's check the actual submission file that was sent
print('Checking /home/submission/submission.csv...')
submission_df = pd.read_csv('/home/submission/submission.csv')

print(f'Total rows: {len(submission_df)}')
print(f'Expected rows: {sum(range(1, 201))}')

# Check group 008 specifically
group_008_sub = submission_df[submission_df['id'].str.startswith('008_')]
print(f'\nGroup 008 has {len(group_008_sub)} trees')

# Create polygons and check
polygons = []
for _, row in group_008_sub.iterrows():
    x = row['x'].replace('s', '')
    y = row['y'].replace('s', '')
    deg = row['deg'].replace('s', '')
    poly = create_tree_polygon_lb(x, y, deg)
    polygons.append(poly)

overlapping = check_lb_overlaps(polygons)
print(f'Overlapping pairs: {overlapping}')

Checking /home/submission/submission.csv...
Total rows: 20100
Expected rows: 20100

Group 008 has 8 trees
Overlapping pairs: []


In [15]:
# Check ALL groups in the submission file
print('Checking ALL groups in submission file...')

overlap_groups_sub = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = submission_df[submission_df['id'].str.startswith(prefix)]
    
    if len(config_df) != n:
        print(f'WARNING: Group {n} has {len(config_df)} trees instead of {n}')
        continue
        
    polygons = []
    for _, row in config_df.iterrows():
        x = row['x'].replace('s', '')
        y = row['y'].replace('s', '')
        deg = row['deg'].replace('s', '')
        poly = create_tree_polygon_lb(x, y, deg)
        polygons.append(poly)
    
    overlapping = check_lb_overlaps(polygons)
    if overlapping:
        overlap_groups_sub.append(n)
        print(f'N={n}: {len(overlapping)} overlapping pairs - {overlapping[:3]}...')

print(f'\nTotal groups with overlaps in submission: {len(overlap_groups_sub)}')

Checking ALL groups in submission file...
N=13: 1 overlapping pairs - [(5, 8)]...
N=18: 1 overlapping pairs - [(11, 12)]...
N=19: 1 overlapping pairs - [(2, 11)]...
N=25: 1 overlapping pairs - [(8, 15)]...
N=27: 1 overlapping pairs - [(13, 17)]...
N=28: 1 overlapping pairs - [(3, 21)]...
N=30: 1 overlapping pairs - [(1, 22)]...
N=31: 1 overlapping pairs - [(1, 16)]...


N=46: 1 overlapping pairs - [(12, 16)]...
N=48: 1 overlapping pairs - [(35, 37)]...
N=53: 1 overlapping pairs - [(5, 28)]...


N=69: 1 overlapping pairs - [(13, 62)]...
N=71: 1 overlapping pairs - [(28, 67)]...


N=102: 1 overlapping pairs - [(17, 71)]...
N=105: 1 overlapping pairs - [(14, 22)]...


N=138: 2 overlapping pairs - [(29, 112), (68, 126)]...


N=164: 2 overlapping pairs - [(57, 107), (73, 106)]...


N=179: 1 overlapping pairs - [(24, 90)]...


N=184: 3 overlapping pairs - [(4, 151), (39, 80), (45, 147)]...



Total groups with overlaps in submission: 19


In [16]:
# Let's check if the LB might be checking groups in order and stopping at the first error
# Group 008 might be the first group that fails some OTHER check

# Let's also check if there's something special about how the LB parses the CSV
# Maybe it's checking for something else like coordinate bounds?

print('Checking coordinate bounds for group 008...')
for _, row in group_008_sub.iterrows():
    x = float(row['x'].replace('s', ''))
    y = float(row['y'].replace('s', ''))
    print(f"{row['id']}: x={x:.15f}, y={y:.15f}")
    if abs(x) > 100 or abs(y) > 100:
        print(f'  WARNING: Out of bounds!')

Checking coordinate bounds for group 008...
008_0: x=-0.249562219766313, y=-0.411091569437793
008_1: x=0.664667542875372, y=-0.857305354342135
008_2: x=-0.664667542875372, y=0.257305355324570
008_3: x=0.557305355180490, y=0.326836893355834
008_4: x=0.249562219825709, y=-0.188908429966201
008_5: x=-0.206588875358127, y=-0.855262101537700
008_6: x=0.206588875126027, y=0.255262101441014
008_7: x=-0.557305355191509, y=-0.926836893355835


In [17]:
# Wait - let me check if the LB might be using a DIFFERENT overlap check
# Maybe it's using the Separating Axis Theorem (SAT) instead of Shapely?

# Let's also check if there's a precision issue with very small distances
print('Checking minimum distances between trees in group 008...')
for i in range(len(polygons)):
    for j in range(i+1, len(polygons)):
        dist = polygons[i].distance(polygons[j])
        if dist < 1e-6:  # Very close
            print(f'Trees {i} and {j}: distance = {dist}')
            print(f'  intersects: {polygons[i].intersects(polygons[j])}')
            print(f'  touches: {polygons[i].touches(polygons[j])}')

Checking minimum distances between trees in group 008...


In [None]:
# Let me compare the submission's group 008 with the source files\nprint('Comparing group 008 across files...')\nprint('='*80)\n\n# Submission\nprint('\\nSubmission group 008:')\nfor _, row in group_008_sub.iterrows():\n    print(f\"  {row['id']}: {row['x']}, {row['y']}, {row['deg']}\")\n\n# best_snapshot.csv\nprint('\\nbest_snapshot.csv group 008:')\nbest_008 = best_snapshot_df[best_snapshot_df['id'].str.startswith('008_')]\nfor _, row in best_008.iterrows():\n    print(f\"  {row['id']}: {row['x']}, {row['y']}, {row['deg']}\")\n\n# chistyakov_best.csv\nprint('\\nchistyakov_best.csv group 008:')\nfor _, row in chistyakov_008.iterrows():\n    print(f\"  {row['id']}: {row['x']}, {row['y']}, {row['deg']}\")

In [None]:
# Check submission_70_926.csv (should be same as chistyakov)
print('\n' + '='*60)
print('Checking submission_70_926.csv...')
print('='*60)

sub_926_df = pd.read_csv('/home/code/preoptimized/submission_70_926.csv')

overlap_groups_926 = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = sub_926_df[sub_926_df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in config_df.iterrows():
        x = row['x'].replace('s', '')
        y = row['y'].replace('s', '')
        deg = row['deg'].replace('s', '')
        poly = create_tree_polygon_lb(x, y, deg)
        polygons.append(poly)
    
    overlapping = check_lb_overlaps(polygons)
    if overlapping:
        overlap_groups_926.append(n)

print(f'Total groups with overlaps: {len(overlap_groups_926)}')
if overlap_groups_926:
    print(f'Overlap groups: {overlap_groups_926[:20]}...')