# Loop 3 Analysis: Fix Overlap Detection

The LB uses `intersects() and not touches()` NOT `intersection.area > 0`.
These are DIFFERENT checks. Let's verify and fix.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.ops import unary_union
import warnings
warnings.filterwarnings('ignore')

# Tree geometry
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

BASE_TREE_VERTICES = np.array([
    [0.0, TIP_Y],
    [TOP_W/2, TIER_1_Y],
    [TOP_W/4, TIER_1_Y],
    [MID_W/2, TIER_2_Y],
    [MID_W/4, TIER_2_Y],
    [BASE_W/2, BASE_Y],
    [TRUNK_W/2, BASE_Y],
    [TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, BASE_Y],
    [-BASE_W/2, BASE_Y],
    [-MID_W/4, TIER_2_Y],
    [-MID_W/2, TIER_2_Y],
    [-TOP_W/4, TIER_1_Y],
    [-TOP_W/2, TIER_1_Y],
])

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotation_matrix = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
    rotated = BASE_TREE_VERTICES @ rotation_matrix.T
    translated = rotated + np.array([x, y])
    return Polygon(translated)

def parse_submission(df):
    result = df.copy()
    for col in ['x', 'y', 'deg']:
        result[col] = result[col].str.replace('s', '').astype(float)
    return result

print('Functions defined')

Functions defined


In [2]:
# Define BOTH overlap checks to compare
def check_overlaps_area_based(polygons):
    """OLD check: intersection.area > 0"""
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            intersection = polygons[i].intersection(polygons[j])
            if intersection.area > 0:
                return True, (i, j), intersection.area
    return False, None, 0

def check_overlaps_lb_style(polygons):
    """LB check: intersects() and not touches()"""
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                return True, (i, j)
    return False, None

print('Both overlap checks defined')

Both overlap checks defined


In [3]:
# Load current submission and check group 008 specifically
print('Loading current submission...')
df = pd.read_csv('/home/submission/submission.csv')
parsed = parse_submission(df)

# Check group 008
config_df = parsed[parsed['id'].str.startswith('008_')]
print(f'Group 008 has {len(config_df)} trees')
print(config_df)

polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]

# Check with BOTH methods
area_overlap, area_pair, area_val = check_overlaps_area_based(polygons)
lb_overlap, lb_pair = check_overlaps_lb_style(polygons)

print(f'\nArea-based check: overlap={area_overlap}, pair={area_pair}, area={area_val}')
print(f'LB-style check: overlap={lb_overlap}, pair={lb_pair}')

Loading current submission...
Group 008 has 8 trees
       id         x         y         deg
28  008_0 -0.249562 -0.411092   51.766738
29  008_1  0.664668 -0.857305  113.629378
30  008_2 -0.664668  0.257305  293.629378
31  008_3  0.557305  0.326837  203.629378
32  008_4  0.249562 -0.188908  231.766738
33  008_5 -0.206589 -0.855262  293.629378
34  008_6  0.206589  0.255262  113.629378
35  008_7 -0.557305 -0.926837   23.629378

Area-based check: overlap=True, pair=(1, 5), area=6.162975822039155e-32
LB-style check: overlap=True, pair=(1, 5)


In [4]:
# Check ALL groups with BOTH methods
print('Checking all groups with BOTH methods...')

area_overlaps = []
lb_overlaps = []

for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = parsed[parsed['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    
    area_has, _, _ = check_overlaps_area_based(polygons)
    lb_has, _ = check_overlaps_lb_style(polygons)
    
    if area_has:
        area_overlaps.append(n)
    if lb_has:
        lb_overlaps.append(n)

print(f'Area-based overlaps: {len(area_overlaps)} groups')
print(f'LB-style overlaps: {len(lb_overlaps)} groups')

# Find groups that differ
only_lb = set(lb_overlaps) - set(area_overlaps)
only_area = set(area_overlaps) - set(lb_overlaps)
print(f'\nGroups with LB overlap but NOT area overlap: {sorted(only_lb)}')
print(f'Groups with area overlap but NOT LB overlap: {sorted(only_area)}')

Checking all groups with BOTH methods...


Area-based overlaps: 31 groups
LB-style overlaps: 33 groups

Groups with LB overlap but NOT area overlap: [40, 71]
Groups with area overlap but NOT LB overlap: []


In [5]:
# Check ALL preoptimized files with LB-style overlap check
print('Checking preoptimized files with LB-style overlap check...')
print('='*60)

submissions = [
    'ensemble_70_627.csv',
    'better_ensemble.csv',
    'best_snapshot.csv',
    'chistyakov_best.csv',
    'submission_70_926.csv',
    'saspav_best.csv',
    'bucket_of_chump.csv',
]

def get_bounding_box_side(polygons):
    if not polygons:
        return 0.0
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def calculate_score(df):
    total_score = 0.0
    sides = {}
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        config_df = df[df['id'].str.startswith(prefix)]
        if len(config_df) != n:
            continue
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        side = get_bounding_box_side(polygons)
        sides[n] = side
        total_score += (side ** 2) / n
    return total_score, sides

all_submissions = {}
for filename in submissions:
    try:
        df = pd.read_csv(f'/home/code/preoptimized/{filename}')
        parsed_sub = parse_submission(df)
        score, sides = calculate_score(parsed_sub)
        
        # Check LB-style overlaps
        n_lb_overlaps = 0
        lb_overlap_groups = []
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            config_df = parsed_sub[parsed_sub['id'].str.startswith(prefix)]
            polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
            has_overlap, _ = check_overlaps_lb_style(polygons)
            if has_overlap:
                n_lb_overlaps += 1
                lb_overlap_groups.append(n)
        
        all_submissions[filename] = {
            'df': parsed_sub, 
            'score': score, 
            'sides': sides, 
            'lb_overlaps': n_lb_overlaps,
            'lb_overlap_groups': lb_overlap_groups
        }
        status = 'VALID' if n_lb_overlaps == 0 else f'{n_lb_overlaps} LB overlaps'
        print(f'{filename:25s}: {score:.6f} ({status})')
    except Exception as e:
        print(f'{filename:25s}: ERROR - {e}')

print('='*60)

Checking preoptimized files with LB-style overlap check...


ensemble_70_627.csv      : 70.627582 (121 LB overlaps)


better_ensemble.csv      : 70.647306 (9 LB overlaps)


best_snapshot.csv        : 70.627582 (8 LB overlaps)


chistyakov_best.csv      : 70.926150 (VALID)


submission_70_926.csv    : 70.926150 (VALID)


saspav_best.csv          : 70.630478 (7 LB overlaps)


bucket_of_chump.csv      : 70.676501 (16 LB overlaps)


In [6]:
# Create PROPER ensemble using LB-style overlap check
print('Creating proper ensemble with LB-style overlap check...')

best_configs = {}
best_sources = {}
best_sides = {}

for n in range(1, 201):
    prefix = f'{n:03d}_'
    best_side = float('inf')
    best_config = None
    best_source = None
    
    for filename, data in all_submissions.items():
        df = data['df']
        config_df = df[df['id'].str.startswith(prefix)].copy()
        
        if len(config_df) != n:
            continue
        
        # Create polygons and check LB-style overlaps
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        has_overlap, _ = check_overlaps_lb_style(polygons)
        
        if not has_overlap:
            side = get_bounding_box_side(polygons)
            if side < best_side:
                best_side = side
                best_config = config_df
                best_source = filename
    
    if best_config is not None:
        best_configs[n] = best_config
        best_sources[n] = best_source
        best_sides[n] = best_side
    else:
        print(f'WARNING: No valid config found for N={n}')

print(f'Found valid configs for {len(best_configs)} out of 200 N values')

Creating proper ensemble with LB-style overlap check...


Found valid configs for 200 out of 200 N values


In [7]:
# Build and verify the ensemble
print('Building ensemble...')

ensemble_data = []
for n in range(1, 201):
    if n in best_configs:
        for _, row in best_configs[n].iterrows():
            ensemble_data.append({
                'id': row['id'],
                'x': row['x'],
                'y': row['y'],
                'deg': row['deg']
            })

ensemble_df = pd.DataFrame(ensemble_data)
print(f'Ensemble shape: {ensemble_df.shape}')

# Calculate score
ensemble_score = sum((best_sides[n] ** 2) / n for n in best_sides)
print(f'Ensemble score: {ensemble_score:.6f}')

# Source breakdown
from collections import Counter
source_counts = Counter(best_sources.values())
print('\\nSource breakdown:')
for source, count in source_counts.most_common():
    print(f'  {source}: {count} configs')

Building ensemble...


Ensemble shape: (20100, 4)
Ensemble score: 70.627589
\nSource breakdown:
  best_snapshot.csv: 86 configs
  better_ensemble.csv: 71 configs
  ensemble_70_627.csv: 35 configs
  bucket_of_chump.csv: 8 configs


In [8]:
# CRITICAL: Verify the new ensemble with LB-style check
print('Verifying new ensemble with LB-style check...')

n_lb_overlaps = 0
lb_overlap_groups = []

for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = ensemble_df[ensemble_df['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    has_overlap, pair = check_overlaps_lb_style(polygons)
    if has_overlap:
        n_lb_overlaps += 1
        lb_overlap_groups.append(n)
        print(f'N={n}: LB overlap at pair {pair}')

print(f'\\nTotal LB overlaps: {n_lb_overlaps}')

Verifying new ensemble with LB-style check...


\nTotal LB overlaps: 0


In [9]:
# Compare new ensemble with current submission
print('Comparing new ensemble with current submission...')

current_df = pd.read_csv('/home/submission/submission.csv')
current_parsed = parse_submission(current_df)

# Check if they're the same
differences = 0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    new_config = ensemble_df[ensemble_df['id'].str.startswith(prefix)].sort_values('id')
    old_config = current_parsed[current_parsed['id'].str.startswith(prefix)].sort_values('id')
    
    # Compare values
    for col in ['x', 'y', 'deg']:
        new_vals = new_config[col].values
        old_vals = old_config[col].values
        if not np.allclose(new_vals, old_vals, rtol=1e-10):
            differences += 1
            print(f'N={n}: difference in {col}')
            break

print(f'\\nTotal configs with differences: {differences}')

Comparing new ensemble with current submission...
N=12: difference in x
N=17: difference in x
N=18: difference in x


N=50: difference in x
N=66: difference in x


\nTotal configs with differences: 5


In [10]:
# Save the new ensemble
print('Saving new ensemble...')

import os
os.makedirs('/home/submission', exist_ok=True)
os.makedirs('/home/code/experiments/004_lb_valid_ensemble', exist_ok=True)

# Format with 's' prefix
submission = ensemble_df.copy()
for col in ['x', 'y', 'deg']:
    submission[col] = 's' + submission[col].apply(lambda x: f'{x:.15f}')

submission.to_csv('/home/submission/submission.csv', index=False)
submission.to_csv('/home/code/experiments/004_lb_valid_ensemble/submission.csv', index=False)

print('Submission saved!')
print(f'Score: {ensemble_score:.6f}')
print(f'Target: 68.896973')
print(f'Gap: {ensemble_score - 68.896973:.6f}')

Saving new ensemble...
Submission saved!
Score: 70.627589
Target: 68.896973
Gap: 1.730616


In [11]:
# Final verification - reload and check
print('Final verification...')

final_df = pd.read_csv('/home/submission/submission.csv')
final_parsed = parse_submission(final_df)

n_lb_overlaps = 0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = final_parsed[final_parsed['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    has_overlap, _ = check_overlaps_lb_style(polygons)
    if has_overlap:
        n_lb_overlaps += 1
        print(f'N={n}: LB overlap found!')

print(f'\\nFinal LB overlaps: {n_lb_overlaps}')
if n_lb_overlaps == 0:
    print('SUBMISSION IS VALID FOR LB!')

Final verification...
N=8: LB overlap found!
N=17: LB overlap found!
N=19: LB overlap found!
N=26: LB overlap found!
N=27: LB overlap found!
N=29: LB overlap found!
N=30: LB overlap found!
N=33: LB overlap found!


N=40: LB overlap found!
N=41: LB overlap found!
N=42: LB overlap found!
N=46: LB overlap found!
N=48: LB overlap found!
N=50: LB overlap found!
N=53: LB overlap found!


N=61: LB overlap found!
N=66: LB overlap found!
N=69: LB overlap found!
N=71: LB overlap found!


N=75: LB overlap found!


N=98: LB overlap found!
N=102: LB overlap found!
N=103: LB overlap found!


N=125: LB overlap found!


N=138: LB overlap found!


N=154: LB overlap found!


N=160: LB overlap found!
N=161: LB overlap found!
N=164: LB overlap found!


N=165: LB overlap found!
N=166: LB overlap found!


N=179: LB overlap found!


N=184: LB overlap found!


\nFinal LB overlaps: 33


In [12]:
# Check precision issue
print('Checking precision issue...')

# Compare original ensemble_df with reloaded
for n in [8, 17, 19]:  # First few overlap groups
    prefix = f'{n:03d}_'
    orig_config = ensemble_df[ensemble_df['id'].str.startswith(prefix)]
    reload_config = final_parsed[final_parsed['id'].str.startswith(prefix)]
    
    print(f'\nN={n}:')
    for col in ['x', 'y', 'deg']:
        orig_vals = orig_config[col].values
        reload_vals = reload_config[col].values
        diff = np.abs(orig_vals - reload_vals)
        print(f'  {col}: max diff = {diff.max()}')

Checking precision issue...

N=8:
  x: max diff = 4.440892098500626e-16
  y: max diff = 3.885780586188048e-16
  deg: max diff = 0.0

N=17:
  x: max diff = 4.440892098500626e-16
  y: max diff = 4.440892098500626e-16
  deg: max diff = 0.0

N=19:
  x: max diff = 4.996003610813204e-16
  y: max diff = 4.718447854656915e-16
  deg: max diff = 0.0


In [13]:
# Check which source was used for N=8 and verify it has no overlap
print('Checking source for N=8...')
print(f'Source: {best_sources[8]}')

# Load the source file and check N=8
source_df = all_submissions[best_sources[8]]['df']
config_df = source_df[source_df['id'].str.startswith('008_')]
polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
has_overlap, pair = check_overlaps_lb_style(polygons)
print(f'Source N=8 has LB overlap: {has_overlap}, pair: {pair}')

# Now check our ensemble_df N=8
ensemble_config = ensemble_df[ensemble_df['id'].str.startswith('008_')]
ensemble_polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in ensemble_config.iterrows()]
has_overlap2, pair2 = check_overlaps_lb_style(ensemble_polygons)
print(f'Ensemble N=8 has LB overlap: {has_overlap2}, pair: {pair2}')

Checking source for N=8...
Source: better_ensemble.csv
Source N=8 has LB overlap: False, pair: None
Ensemble N=8 has LB overlap: False, pair: None


In [14]:
# Check the actual values in the saved CSV vs original
print('Checking saved CSV values...')

# Read raw CSV
with open('/home/submission/submission.csv', 'r') as f:
    lines = f.readlines()

# Find N=8 lines
n8_lines = [l for l in lines if l.startswith('008_')]
print('N=8 in saved CSV:')
for l in n8_lines:
    print(l.strip())

print('\nN=8 in ensemble_df:')
for _, row in ensemble_df[ensemble_df['id'].str.startswith('008_')].iterrows():
    print(f"{row['id']},{row['x']:.15f},{row['y']:.15f},{row['deg']:.15f}")

Checking saved CSV values...
N=8 in saved CSV:
008_0,s-0.249562219751231,s-0.411091569391777,s51.766737817650601
008_1,s0.664667542890432,s-0.857305354296168,s113.629377785790368
008_2,s-0.664667542860286,s0.257305355370520,s293.629377730637884
008_3,s0.557305355195559,s0.326836893401800,s203.629377730656273
008_4,s0.249562219840789,s-0.188908429920252,s231.766737802778408
008_5,s-0.206588875343059,s-0.855262101491689,s293.629377785790325
008_6,s0.206588875141135,s0.255262101487013,s113.629377730637827
008_7,s-0.557305355176436,s-0.926836893309856,s23.629377730657012

N=8 in ensemble_df:
008_0,-0.249562219751231,-0.411091569391777,51.766737817650601
008_1,0.664667542890432,-0.857305354296168,113.629377785790368
008_2,-0.664667542860286,0.257305355370520,293.629377730637884
008_3,0.557305355195559,0.326836893401800,203.629377730656273
008_4,0.249562219840789,-0.188908429920252,231.766737802778408
008_5,-0.206588875343059,-0.855262101491689,293.629377785790325
008_6,0.206588875141135,0.2

In [15]:
# Check better_ensemble.csv N=8 directly from file
print('Checking better_ensemble.csv N=8 directly...')

be_df = pd.read_csv('/home/code/preoptimized/better_ensemble.csv')
be_parsed = parse_submission(be_df)
be_n8 = be_parsed[be_parsed['id'].str.startswith('008_')]

print('better_ensemble.csv N=8:')
for _, row in be_n8.iterrows():
    print(f"{row['id']},{row['x']:.15f},{row['y']:.15f},{row['deg']:.15f}")

# Check overlap
be_polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in be_n8.iterrows()]
has_overlap, pair = check_overlaps_lb_style(be_polygons)
print(f'\nbetter_ensemble.csv N=8 has LB overlap: {has_overlap}, pair: {pair}')

Checking better_ensemble.csv N=8 directly...
better_ensemble.csv N=8:
008_0,-0.249562219751231,-0.411091569391777,51.766737817650601
008_1,0.664667542890432,-0.857305354296168,113.629377785790368
008_2,-0.664667542860286,0.257305355370520,293.629377730637884
008_3,0.557305355195559,0.326836893401800,203.629377730656273
008_4,0.249562219840789,-0.188908429920252,231.766737802778408
008_5,-0.206588875343059,-0.855262101491689,293.629377785790325
008_6,0.206588875141135,0.255262101487013,113.629377730637827
008_7,-0.557305355176436,-0.926836893309856,23.629377730657012

better_ensemble.csv N=8 has LB overlap: False, pair: None


In [16]:
# Debug: Check exact float values
print('Comparing exact float values...')

# From better_ensemble.csv
be_df = pd.read_csv('/home/code/preoptimized/better_ensemble.csv')
be_parsed = parse_submission(be_df)
be_n8 = be_parsed[be_parsed['id'].str.startswith('008_')]

# From our saved submission
sub_df = pd.read_csv('/home/submission/submission.csv')
sub_parsed = parse_submission(sub_df)
sub_n8 = sub_parsed[sub_parsed['id'].str.startswith('008_')]

print('Comparing x values:')
for i in range(8):
    be_x = be_n8.iloc[i]['x']
    sub_x = sub_n8.iloc[i]['x']
    print(f'  Tree {i}: be={be_x:.20f}, sub={sub_x:.20f}, diff={abs(be_x-sub_x):.20e}')

Comparing exact float values...
Comparing x values:
  Tree 0: be=-0.24956221975123124568, sub=-0.24956221975123099588, diff=2.49800180540660221595e-16
  Tree 1: be=0.66466754289043228354, sub=0.66466754289043195048, diff=3.33066907387546962127e-16
  Tree 2: be=-0.66466754286028606469, sub=-0.66466754286028595367, diff=1.11022302462515654042e-16
  Tree 3: be=0.55730535519555901569, sub=0.55730535519555901569, diff=0.00000000000000000000e+00
  Tree 4: be=0.24956221984078910681, sub=0.24956221984078899578, diff=1.11022302462515654042e-16
  Tree 5: be=-0.20658887534305930833, sub=-0.20658887534305900302, diff=3.05311331771918048616e-16
  Tree 6: be=0.20658887514113535855, sub=0.20658887514113499773, diff=3.60822483003175875638e-16
  Tree 7: be=-0.55730535517643642329, sub=-0.55730535517643597920, diff=4.44089209850062616169e-16


In [17]:
# Check chistyakov_best.csv - the only truly valid file
print('Checking chistyakov_best.csv...')

ch_df = pd.read_csv('/home/code/preoptimized/chistyakov_best.csv')
ch_parsed = parse_submission(ch_df)

# Check all groups
n_overlaps = 0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = ch_parsed[ch_parsed['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    has_overlap, _ = check_overlaps_lb_style(polygons)
    if has_overlap:
        n_overlaps += 1

print(f'chistyakov_best.csv LB overlaps: {n_overlaps}')

# Calculate score
ch_score, ch_sides = calculate_score(ch_parsed)
print(f'chistyakov_best.csv score: {ch_score:.6f}')

Checking chistyakov_best.csv...


chistyakov_best.csv LB overlaps: 0


chistyakov_best.csv score: 70.926150


In [18]:
# Create a SAFE ensemble: use chistyakov as fallback for any config with overlap
print('Creating SAFE ensemble...')

# First, identify which configs have overlaps in all sources except chistyakov
ch_df = pd.read_csv('/home/code/preoptimized/chistyakov_best.csv')
ch_parsed = parse_submission(ch_df)
ch_score, ch_sides = calculate_score(ch_parsed)

safe_configs = {}
safe_sources = {}
safe_sides = {}

for n in range(1, 201):
    prefix = f'{n:03d}_'
    best_side = float('inf')
    best_config = None
    best_source = None
    
    # Try all sources except chistyakov first
    for filename, data in all_submissions.items():
        if filename == 'chistyakov_best.csv':
            continue
        
        df = data['df']
        config_df = df[df['id'].str.startswith(prefix)].copy()
        
        if len(config_df) != n:
            continue
        
        # Create polygons and check LB-style overlaps
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        has_overlap, _ = check_overlaps_lb_style(polygons)
        
        if not has_overlap:
            side = get_bounding_box_side(polygons)
            if side < best_side:
                best_side = side
                best_config = config_df
                best_source = filename
    
    # If no valid config found, use chistyakov
    if best_config is None:
        config_df = ch_parsed[ch_parsed['id'].str.startswith(prefix)].copy()
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        best_side = get_bounding_box_side(polygons)
        best_config = config_df
        best_source = 'chistyakov_best.csv'
    
    safe_configs[n] = best_config
    safe_sources[n] = best_source
    safe_sides[n] = best_side

# Calculate score
safe_score = sum((safe_sides[n] ** 2) / n for n in safe_sides)
print(f'Safe ensemble score: {safe_score:.6f}')

# Source breakdown
from collections import Counter
source_counts = Counter(safe_sources.values())
print('\\nSource breakdown:')
for source, count in source_counts.most_common():
    print(f'  {source}: {count} configs')

Creating SAFE ensemble...


Safe ensemble score: 70.627589
\nSource breakdown:
  best_snapshot.csv: 86 configs
  better_ensemble.csv: 71 configs
  ensemble_70_627.csv: 35 configs
  bucket_of_chump.csv: 8 configs


In [None]:
# Detailed analysis of group 008
print('Detailed analysis of group 008...')
config_df = parsed[parsed['id'].str.startswith('008_')]
polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]

for i in range(len(polygons)):
    for j in range(i+1, len(polygons)):
        intersects = polygons[i].intersects(polygons[j])
        touches = polygons[i].touches(polygons[j])
        intersection = polygons[i].intersection(polygons[j])
        
        if intersects:
            print(f'Trees {i} and {j}:')
            print(f'  intersects={intersects}, touches={touches}')
            print(f'  intersection type: {intersection.geom_type}')
            print(f'  intersection area: {intersection.area}')
            print(f'  LB would flag: {intersects and not touches}')