# Experiment 003: Strict Overlap Validation

The LB uses STRICT overlap detection - any intersection area > 0 is considered an overlap.
We need to re-validate our ensemble using this stricter check.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import warnings
warnings.filterwarnings('ignore')

print('Libraries loaded')

Libraries loaded


In [2]:
# Tree geometry constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

BASE_TREE_VERTICES = np.array([
    [0.0, TIP_Y],
    [TOP_W/2, TIER_1_Y],
    [TOP_W/4, TIER_1_Y],
    [MID_W/2, TIER_2_Y],
    [MID_W/4, TIER_2_Y],
    [BASE_W/2, BASE_Y],
    [TRUNK_W/2, BASE_Y],
    [TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, BASE_Y],
    [-BASE_W/2, BASE_Y],
    [-MID_W/4, TIER_2_Y],
    [-MID_W/2, TIER_2_Y],
    [-TOP_W/4, TIER_1_Y],
    [-TOP_W/2, TIER_1_Y],
])

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotation_matrix = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
    rotated = BASE_TREE_VERTICES @ rotation_matrix.T
    translated = rotated + np.array([x, y])
    return Polygon(translated)

def get_bounding_box_side(polygons):
    if not polygons:
        return 0.0
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def parse_submission(df):
    result = df.copy()
    for col in ['x', 'y', 'deg']:
        result[col] = result[col].str.replace('s', '').astype(float)
    return result

print('Helper functions defined')

Helper functions defined


In [3]:
# STRICT overlap check - any intersection area > 0 is an overlap
def check_strict_overlaps(polygons):
    """Check for ANY intersection area > 0 (stricter than touches())."""
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            intersection = polygons[i].intersection(polygons[j])
            if intersection.area > 0:
                return True, (i, j), intersection.area
    return False, None, 0

# Also keep the old check for comparison
def check_overlaps_old(polygons):
    """Old check using touches() - may allow tiny overlaps."""
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if i < j:
                if poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                    return True, (i, j)
    return False, None

print('Strict overlap check defined')

Strict overlap check defined


In [4]:
# Load current submission and check with STRICT validation
print('Loading current submission...')
current_df = pd.read_csv('/home/submission/submission.csv')
current_parsed = parse_submission(current_df)
print(f'Shape: {current_parsed.shape}')

Loading current submission...
Shape: (20100, 4)


In [5]:
# Check each configuration with STRICT overlap detection
print('Checking with STRICT overlap detection...')
strict_overlap_configs = []
max_overlap_areas = {}

for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = current_parsed[current_parsed['id'].str.startswith(prefix)]
    
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    
    has_overlap, pair, area = check_strict_overlaps(polygons)
    if has_overlap:
        strict_overlap_configs.append(n)
        max_overlap_areas[n] = area

print(f'Configs with STRICT overlaps: {len(strict_overlap_configs)}')
if strict_overlap_configs:
    print(f'First 20 overlap configs: {strict_overlap_configs[:20]}')
    print(f'Sample overlap areas: {list(max_overlap_areas.items())[:10]}')

Checking with STRICT overlap detection...


Configs with STRICT overlaps: 0


In [6]:
# Load all pre-optimized submissions and check which are STRICTLY valid
print('\nChecking all pre-optimized submissions with STRICT validation...')
print('='*60)

submissions = [
    'ensemble_70_627.csv',
    'better_ensemble.csv',
    'best_snapshot.csv',
    'chistyakov_best.csv',
    'submission_70_926.csv',
    'saspav_best.csv',
    'bucket_of_chump.csv',
]

def calculate_score(df):
    scores = {}
    total_score = 0.0
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        config_df = df[df['id'].str.startswith(prefix)]
        if len(config_df) != n:
            continue
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        side = get_bounding_box_side(polygons)
        scores[n] = side
        total_score += (side ** 2) / n
    return total_score, scores

all_submissions = {}
for filename in submissions:
    try:
        df = pd.read_csv(f'/home/code/preoptimized/{filename}')
        parsed = parse_submission(df)
        score, sides = calculate_score(parsed)
        
        # Check STRICT overlaps
        n_strict_overlaps = 0
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            config_df = parsed[parsed['id'].str.startswith(prefix)]
            polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
            has_overlap, _, _ = check_strict_overlaps(polygons)
            if has_overlap:
                n_strict_overlaps += 1
        
        all_submissions[filename] = {'df': parsed, 'score': score, 'sides': sides, 'strict_overlaps': n_strict_overlaps}
        status = 'VALID' if n_strict_overlaps == 0 else f'{n_strict_overlaps} STRICT overlaps'
        print(f'{filename:25s}: {score:.6f} ({status})')
    except Exception as e:
        print(f'{filename:25s}: ERROR - {e}')

print('='*60)


Checking all pre-optimized submissions with STRICT validation...


ensemble_70_627.csv      : 70.627582 (120 STRICT overlaps)


better_ensemble.csv      : 70.647306 (7 STRICT overlaps)


best_snapshot.csv        : 70.627582 (7 STRICT overlaps)


chistyakov_best.csv      : 70.926150 (VALID)


submission_70_926.csv    : 70.926150 (VALID)


saspav_best.csv          : 70.630478 (6 STRICT overlaps)


bucket_of_chump.csv      : 70.676501 (15 STRICT overlaps)


In [7]:
# Create STRICT ensemble - only use configs with ZERO intersection area
print('\nCreating STRICT ensemble...')

best_configs = {}
best_sources = {}
best_sides = {}

for n in range(1, 201):
    prefix = f'{n:03d}_'
    best_side = float('inf')
    best_config = None
    best_source = None
    
    for filename, data in all_submissions.items():
        df = data['df']
        config_df = df[df['id'].str.startswith(prefix)].copy()
        
        if len(config_df) != n:
            continue
        
        # Create polygons and check STRICT overlaps
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        has_overlap, _, _ = check_strict_overlaps(polygons)
        
        if not has_overlap:
            side = get_bounding_box_side(polygons)
            if side < best_side:
                best_side = side
                best_config = config_df
                best_source = filename
    
    if best_config is not None:
        best_configs[n] = best_config
        best_sources[n] = best_source
        best_sides[n] = best_side
    else:
        print(f'WARNING: No STRICTLY valid config found for N={n}')

print(f'Found STRICTLY valid configs for {len(best_configs)} out of 200 N values')


Creating STRICT ensemble...


Found STRICTLY valid configs for 200 out of 200 N values


In [8]:
# Build the strict ensemble
print('Building strict ensemble...')

ensemble_data = []
for n in range(1, 201):
    if n in best_configs:
        for _, row in best_configs[n].iterrows():
            ensemble_data.append({
                'id': row['id'],
                'x': row['x'],
                'y': row['y'],
                'deg': row['deg']
            })

strict_ensemble = pd.DataFrame(ensemble_data)
print(f'Strict ensemble shape: {strict_ensemble.shape}')

# Calculate score
strict_score = sum((best_sides[n] ** 2) / n for n in best_sides)
print(f'Strict ensemble score: {strict_score:.6f}')

Building strict ensemble...


Strict ensemble shape: (20100, 4)
Strict ensemble score: 70.627589


In [9]:
# Final verification with STRICT overlap check
print('\nFinal STRICT verification...')
final_overlaps = 0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = strict_ensemble[strict_ensemble['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    has_overlap, pair, area = check_strict_overlaps(polygons)
    if has_overlap:
        final_overlaps += 1
        print(f'N={n}: overlap at {pair}, area={area}')

if final_overlaps == 0:
    print('NO STRICT OVERLAPS! Submission should pass LB validation.')
else:
    print(f'Found {final_overlaps} configs with STRICT overlaps')


Final STRICT verification...


NO STRICT OVERLAPS! Submission should pass LB validation.


In [10]:
# Source breakdown
print('\nSource breakdown for strict ensemble:')
from collections import Counter
source_counts = Counter(best_sources.values())
for source, count in source_counts.most_common():
    print(f'{source}: {count} configs')


Source breakdown for strict ensemble:
best_snapshot.csv: 86 configs
better_ensemble.csv: 72 configs
ensemble_70_627.csv: 35 configs
bucket_of_chump.csv: 7 configs


In [11]:
# Save the strict ensemble
print('\nSaving strict ensemble...')

import os
os.makedirs('/home/submission', exist_ok=True)

# Format with 's' prefix - use high precision
submission = strict_ensemble.copy()
for col in ['x', 'y', 'deg']:
    submission[col] = 's' + submission[col].apply(lambda x: f'{x:.15f}')

submission.to_csv('/home/submission/submission.csv', index=False)
submission.to_csv('/home/code/experiments/003_strict_ensemble/submission.csv', index=False)

print('Submission saved!')
print(f'Score: {strict_score:.6f}')
print(f'Target: 68.897509')
print(f'Gap: {strict_score - 68.897509:.6f}')


Saving strict ensemble...
Submission saved!
Score: 70.627589
Target: 68.897509
Gap: 1.730080


In [12]:
# Summary
print('\n' + '='*60)
print('EXPERIMENT 003 SUMMARY')
print('='*60)
print(f'Strict ensemble score: {strict_score:.6f}')
print(f'Target: 68.897509')
print(f'Gap: {strict_score - 68.897509:.6f}')
print(f'STRICT overlaps: 0')
print(f'Sources used: {len(source_counts)}')
print('='*60)


EXPERIMENT 003 SUMMARY
Strict ensemble score: 70.627589
Target: 68.897509
Gap: 1.730080
STRICT overlaps: 0
Sources used: 4


In [13]:
# Try to expand source pool from snapshots
print('Searching for more submissions in snapshots...')
import os
import glob

snapshot_submissions = []
snapshot_dir = '/home/nonroot/snapshots/santa-2025/'

for subdir in os.listdir(snapshot_dir):
    subdir_path = os.path.join(snapshot_dir, subdir)
    if os.path.isdir(subdir_path):
        # Look for submission.csv files
        for root, dirs, files in os.walk(subdir_path):
            for f in files:
                if f.endswith('.csv') and 'submission' in f.lower():
                    full_path = os.path.join(root, f)
                    snapshot_submissions.append(full_path)

print(f'Found {len(snapshot_submissions)} potential submission files in snapshots')

Searching for more submissions in snapshots...
Found 1288 potential submission files in snapshots
