# Experiment 005: Fix Vertex-Inside-Polygon Overlaps

The previous submission failed because our overlap detection missed cases where a vertex of one tree is inside another tree's polygon (but the intersection area is 0 because it's just a point).

We found 2 problematic N values: 60 and 105. We'll replace these with baseline configurations.

In [None]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
import shutil
import os
from utils import (
    load_submission, load_trees_for_n, get_trees_data_for_n,
    score_submission, verify_submission_no_overlaps
)
from shapely.geometry import Point
from itertools import combinations
import json

print("Utilities loaded successfully!")

In [None]:
# Load submissions
work_dir = '/home/code/experiments/005_fixed_submission'
os.makedirs(work_dir, exist_ok=True)
os.chdir(work_dir)

baseline_df = load_submission('/home/code/experiments/000_baseline/submission.csv')
failed_df = load_submission('/home/code/experiments/004_optimize_ensemble/submission.csv')

print(f'Baseline loaded: {baseline_df.shape}')
print(f'Failed submission loaded: {failed_df.shape}')

In [None]:
def has_vertex_inside_overlap(trees):
    """Check if any vertex of one tree is inside another tree's polygon."""
    for i, j in combinations(range(len(trees)), 2):
        t1, t2 = trees[i], trees[j]
        p1, p2 = t1.polygon, t2.polygon
        
        # Check if any vertex of one is inside the other
        for coord in p1.exterior.coords[:-1]:
            pt = Point(coord)
            if p2.contains(pt):
                return True, (i, j)
        
        for coord in p2.exterior.coords[:-1]:
            pt = Point(coord)
            if p1.contains(pt):
                return True, (i, j)
    
    return False, None

# Find ALL problematic N values
print('Scanning all N values for vertex-inside-polygon overlaps...')
problematic_ns = []

for n in range(1, 201):
    trees = load_trees_for_n(failed_df, n)
    if len(trees) != n:
        continue
    
    has_problem, pair = has_vertex_inside_overlap(trees)
    if has_problem:
        problematic_ns.append(n)
        print(f'  N={n}: overlap between trees {pair}')

print(f'\nTotal problematic N values: {len(problematic_ns)}')

In [None]:
# Create fixed submission by replacing problematic N values with baseline
print('Creating fixed submission...')

fixed_df = failed_df.copy()

for n in problematic_ns:
    print(f'  Replacing N={n} with baseline configuration')
    
    # Get baseline data for this N
    baseline_n_data = get_trees_data_for_n(baseline_df, n)
    
    # Remove failed data for this N
    prefix = f"{n:03d}_"
    fixed_df = fixed_df[~fixed_df['id'].str.startswith(prefix)]
    
    # Add baseline data
    fixed_df = pd.concat([fixed_df, baseline_n_data], ignore_index=True)

# Sort by id
fixed_df['n'] = fixed_df['id'].apply(lambda x: int(x.split('_')[0]))
fixed_df['tree_idx'] = fixed_df['id'].apply(lambda x: int(x.split('_')[1]))
fixed_df = fixed_df.sort_values(['n', 'tree_idx']).drop(columns=['n', 'tree_idx']).reset_index(drop=True)

print(f'Fixed submission shape: {fixed_df.shape}')

In [None]:
# Verify the fixed submission
print('Verifying fixed submission...')

# Check standard overlap detection
is_valid, overlapping_ns = verify_submission_no_overlaps(fixed_df)
print(f'Standard overlap check: valid={is_valid}, overlapping_ns={overlapping_ns}')

# Check vertex-inside-polygon overlaps
print('\nChecking vertex-inside-polygon overlaps...')
vertex_problems = []
for n in range(1, 201):
    trees = load_trees_for_n(fixed_df, n)
    if len(trees) != n:
        continue
    has_problem, pair = has_vertex_inside_overlap(trees)
    if has_problem:
        vertex_problems.append(n)
        print(f'  N={n}: still has overlap!')

if not vertex_problems:
    print('  No vertex-inside-polygon overlaps found!')

print(f'\nFinal validation: {len(overlapping_ns) == 0 and len(vertex_problems) == 0}')

In [None]:
# Calculate scores
print('Calculating scores...')

baseline_score, baseline_by_n, _ = score_submission(baseline_df, check_overlaps=False)
failed_score, failed_by_n, _ = score_submission(failed_df, check_overlaps=False)
fixed_score, fixed_by_n, _ = score_submission(fixed_df, check_overlaps=False)

print(f'Baseline score: {baseline_score:.6f}')
print(f'Failed score: {failed_score:.6f}')
print(f'Fixed score: {fixed_score:.6f}')
print(f'\nImprovement over baseline: {baseline_score - fixed_score:.6f}')
print(f'Loss from fixing: {fixed_score - failed_score:.6f}')

In [None]:
# Save the fixed submission
print('\nSaving fixed submission...')

fixed_df.to_csv('submission.csv', index=False)
print(f'Saved to {work_dir}/submission.csv')

shutil.copy('submission.csv', '/home/submission/submission.csv')
print('Copied to /home/submission/submission.csv')

# Save metrics
metrics = {
    'cv_score': fixed_score,
    'baseline_score': baseline_score,
    'failed_score': failed_score,
    'improvement_over_baseline': baseline_score - fixed_score,
    'loss_from_fixing': fixed_score - failed_score,
    'problematic_ns_fixed': problematic_ns,
    'is_valid': len(overlapping_ns) == 0 and len(vertex_problems) == 0
}
with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print(f'\nMetrics: {metrics}')

In [None]:
# Summary
print('=' * 60)
print('EXPERIMENT 005: FIXED SUBMISSION SUMMARY')
print('=' * 60)
print(f'Baseline score: {baseline_score:.6f}')
print(f'Failed score (had overlaps): {failed_score:.6f}')
print(f'Fixed score: {fixed_score:.6f}')
print(f'Improvement over baseline: {baseline_score - fixed_score:.6f}')
print(f'N values fixed: {problematic_ns}')
print(f'Is valid: {len(overlapping_ns) == 0 and len(vertex_problems) == 0}')
print(f'\nTarget: 68.888293')
print(f'Gap to target: {fixed_score - 68.888293:.6f} ({(fixed_score - 68.888293) / 68.888293 * 100:.2f}%)')
print('=' * 60)