# Loop 3 Analysis: Fix Overlap Detection

The LB uses `intersects() and not touches()` NOT `intersection.area > 0`.
These are DIFFERENT checks. Let's verify and fix.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.ops import unary_union
import warnings
warnings.filterwarnings('ignore')

# Tree geometry
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

BASE_TREE_VERTICES = np.array([
    [0.0, TIP_Y],
    [TOP_W/2, TIER_1_Y],
    [TOP_W/4, TIER_1_Y],
    [MID_W/2, TIER_2_Y],
    [MID_W/4, TIER_2_Y],
    [BASE_W/2, BASE_Y],
    [TRUNK_W/2, BASE_Y],
    [TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, BASE_Y],
    [-BASE_W/2, BASE_Y],
    [-MID_W/4, TIER_2_Y],
    [-MID_W/2, TIER_2_Y],
    [-TOP_W/4, TIER_1_Y],
    [-TOP_W/2, TIER_1_Y],
])

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotation_matrix = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
    rotated = BASE_TREE_VERTICES @ rotation_matrix.T
    translated = rotated + np.array([x, y])
    return Polygon(translated)

def parse_submission(df):
    result = df.copy()
    for col in ['x', 'y', 'deg']:
        result[col] = result[col].str.replace('s', '').astype(float)
    return result

print('Functions defined')

Functions defined


In [2]:
# Define BOTH overlap checks to compare
def check_overlaps_area_based(polygons):
    """OLD check: intersection.area > 0"""
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            intersection = polygons[i].intersection(polygons[j])
            if intersection.area > 0:
                return True, (i, j), intersection.area
    return False, None, 0

def check_overlaps_lb_style(polygons):
    """LB check: intersects() and not touches()"""
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                return True, (i, j)
    return False, None

print('Both overlap checks defined')

Both overlap checks defined


In [3]:
# Load current submission and check group 008 specifically
print('Loading current submission...')
df = pd.read_csv('/home/submission/submission.csv')
parsed = parse_submission(df)

# Check group 008
config_df = parsed[parsed['id'].str.startswith('008_')]
print(f'Group 008 has {len(config_df)} trees')
print(config_df)

polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]

# Check with BOTH methods
area_overlap, area_pair, area_val = check_overlaps_area_based(polygons)
lb_overlap, lb_pair = check_overlaps_lb_style(polygons)

print(f'\nArea-based check: overlap={area_overlap}, pair={area_pair}, area={area_val}')
print(f'LB-style check: overlap={lb_overlap}, pair={lb_pair}')

Loading current submission...
Group 008 has 8 trees
       id         x         y         deg
28  008_0 -0.249562 -0.411092   51.766738
29  008_1  0.664668 -0.857305  113.629378
30  008_2 -0.664668  0.257305  293.629378
31  008_3  0.557305  0.326837  203.629378
32  008_4  0.249562 -0.188908  231.766738
33  008_5 -0.206589 -0.855262  293.629378
34  008_6  0.206589  0.255262  113.629378
35  008_7 -0.557305 -0.926837   23.629378

Area-based check: overlap=True, pair=(1, 5), area=6.162975822039155e-32
LB-style check: overlap=True, pair=(1, 5)


In [4]:
# Check ALL groups with BOTH methods
print('Checking all groups with BOTH methods...')

area_overlaps = []
lb_overlaps = []

for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = parsed[parsed['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    
    area_has, _, _ = check_overlaps_area_based(polygons)
    lb_has, _ = check_overlaps_lb_style(polygons)
    
    if area_has:
        area_overlaps.append(n)
    if lb_has:
        lb_overlaps.append(n)

print(f'Area-based overlaps: {len(area_overlaps)} groups')
print(f'LB-style overlaps: {len(lb_overlaps)} groups')

# Find groups that differ
only_lb = set(lb_overlaps) - set(area_overlaps)
only_area = set(area_overlaps) - set(lb_overlaps)
print(f'\nGroups with LB overlap but NOT area overlap: {sorted(only_lb)}')
print(f'Groups with area overlap but NOT LB overlap: {sorted(only_area)}')

Checking all groups with BOTH methods...


Area-based overlaps: 31 groups
LB-style overlaps: 33 groups

Groups with LB overlap but NOT area overlap: [40, 71]
Groups with area overlap but NOT LB overlap: []


In [5]:
# Check ALL preoptimized files with LB-style overlap check
print('Checking preoptimized files with LB-style overlap check...')
print('='*60)

submissions = [
    'ensemble_70_627.csv',
    'better_ensemble.csv',
    'best_snapshot.csv',
    'chistyakov_best.csv',
    'submission_70_926.csv',
    'saspav_best.csv',
    'bucket_of_chump.csv',
]

def get_bounding_box_side(polygons):
    if not polygons:
        return 0.0
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def calculate_score(df):
    total_score = 0.0
    sides = {}
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        config_df = df[df['id'].str.startswith(prefix)]
        if len(config_df) != n:
            continue
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        side = get_bounding_box_side(polygons)
        sides[n] = side
        total_score += (side ** 2) / n
    return total_score, sides

all_submissions = {}
for filename in submissions:
    try:
        df = pd.read_csv(f'/home/code/preoptimized/{filename}')
        parsed_sub = parse_submission(df)
        score, sides = calculate_score(parsed_sub)
        
        # Check LB-style overlaps
        n_lb_overlaps = 0
        lb_overlap_groups = []
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            config_df = parsed_sub[parsed_sub['id'].str.startswith(prefix)]
            polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
            has_overlap, _ = check_overlaps_lb_style(polygons)
            if has_overlap:
                n_lb_overlaps += 1
                lb_overlap_groups.append(n)
        
        all_submissions[filename] = {
            'df': parsed_sub, 
            'score': score, 
            'sides': sides, 
            'lb_overlaps': n_lb_overlaps,
            'lb_overlap_groups': lb_overlap_groups
        }
        status = 'VALID' if n_lb_overlaps == 0 else f'{n_lb_overlaps} LB overlaps'
        print(f'{filename:25s}: {score:.6f} ({status})')
    except Exception as e:
        print(f'{filename:25s}: ERROR - {e}')

print('='*60)

Checking preoptimized files with LB-style overlap check...


ensemble_70_627.csv      : 70.627582 (121 LB overlaps)


better_ensemble.csv      : 70.647306 (9 LB overlaps)


best_snapshot.csv        : 70.627582 (8 LB overlaps)


chistyakov_best.csv      : 70.926150 (VALID)


submission_70_926.csv    : 70.926150 (VALID)


saspav_best.csv          : 70.630478 (7 LB overlaps)


bucket_of_chump.csv      : 70.676501 (16 LB overlaps)


In [6]:
# Create PROPER ensemble using LB-style overlap check
print('Creating proper ensemble with LB-style overlap check...')

best_configs = {}
best_sources = {}
best_sides = {}

for n in range(1, 201):
    prefix = f'{n:03d}_'
    best_side = float('inf')
    best_config = None
    best_source = None
    
    for filename, data in all_submissions.items():
        df = data['df']
        config_df = df[df['id'].str.startswith(prefix)].copy()
        
        if len(config_df) != n:
            continue
        
        # Create polygons and check LB-style overlaps
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        has_overlap, _ = check_overlaps_lb_style(polygons)
        
        if not has_overlap:
            side = get_bounding_box_side(polygons)
            if side < best_side:
                best_side = side
                best_config = config_df
                best_source = filename
    
    if best_config is not None:
        best_configs[n] = best_config
        best_sources[n] = best_source
        best_sides[n] = best_side
    else:
        print(f'WARNING: No valid config found for N={n}')

print(f'Found valid configs for {len(best_configs)} out of 200 N values')

Creating proper ensemble with LB-style overlap check...


Found valid configs for 200 out of 200 N values


In [7]:
# Build and verify the ensemble
print('Building ensemble...')

ensemble_data = []
for n in range(1, 201):
    if n in best_configs:
        for _, row in best_configs[n].iterrows():
            ensemble_data.append({
                'id': row['id'],
                'x': row['x'],
                'y': row['y'],
                'deg': row['deg']
            })

ensemble_df = pd.DataFrame(ensemble_data)
print(f'Ensemble shape: {ensemble_df.shape}')

# Calculate score
ensemble_score = sum((best_sides[n] ** 2) / n for n in best_sides)
print(f'Ensemble score: {ensemble_score:.6f}')

# Source breakdown
from collections import Counter
source_counts = Counter(best_sources.values())
print('\\nSource breakdown:')
for source, count in source_counts.most_common():
    print(f'  {source}: {count} configs')

Building ensemble...


Ensemble shape: (20100, 4)
Ensemble score: 70.627589
\nSource breakdown:
  best_snapshot.csv: 86 configs
  better_ensemble.csv: 71 configs
  ensemble_70_627.csv: 35 configs
  bucket_of_chump.csv: 8 configs


In [8]:
# CRITICAL: Verify the new ensemble with LB-style check
print('Verifying new ensemble with LB-style check...')

n_lb_overlaps = 0
lb_overlap_groups = []

for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = ensemble_df[ensemble_df['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    has_overlap, pair = check_overlaps_lb_style(polygons)
    if has_overlap:
        n_lb_overlaps += 1
        lb_overlap_groups.append(n)
        print(f'N={n}: LB overlap at pair {pair}')

print(f'\\nTotal LB overlaps: {n_lb_overlaps}')

Verifying new ensemble with LB-style check...


\nTotal LB overlaps: 0


In [None]:
# Detailed analysis of group 008
print('Detailed analysis of group 008...')
config_df = parsed[parsed['id'].str.startswith('008_')]
polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]

for i in range(len(polygons)):
    for j in range(i+1, len(polygons)):
        intersects = polygons[i].intersects(polygons[j])
        touches = polygons[i].touches(polygons[j])
        intersection = polygons[i].intersection(polygons[j])
        
        if intersects:
            print(f'Trees {i} and {j}:')
            print(f'  intersects={intersects}, touches={touches}')
            print(f'  intersection type: {intersection.geom_type}')
            print(f'  intersection area: {intersection.area}')
            print(f'  LB would flag: {intersects and not touches}')