# Loop 5 Analysis: Fixing Overlap Issues

## Problem
Submission failed with 'Overlapping trees in group 031'
Our local overlap detection missed this overlap.

## Strategy
1. Use STRICTER overlap detection (Kaggle-compatible)
2. Only use N values from better solution that pass STRICT validation
3. Fall back to validated solution for any questionable N values

In [1]:
import pandas as pd
import numpy as np
import math
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import os

getcontext().prec = 30
scale_factor = Decimal('1e18')  # Higher precision like saspav kernel

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print('Setup complete')

Setup complete


In [2]:
def get_tree_polygon_strict(x, y, deg):
    """Create tree polygon with high precision (Kaggle-compatible)"""
    x_dec = Decimal(str(x))
    y_dec = Decimal(str(y))
    
    # Build polygon at origin with high precision
    vertices = []
    for tx, ty in zip(TX, TY):
        vertices.append((float(Decimal(str(tx)) * scale_factor), 
                        float(Decimal(str(ty)) * scale_factor)))
    
    initial_polygon = Polygon(vertices)
    
    # Rotate and translate
    rotated = affinity.rotate(initial_polygon, float(deg), origin=(0, 0))
    translated = affinity.translate(rotated, 
                                    xoff=float(x_dec * scale_factor),
                                    yoff=float(y_dec * scale_factor))
    return translated

def has_overlap_strict(trees_data):
    """Check for overlaps using STRtree (like saspav kernel)"""
    if len(trees_data) <= 1:
        return False, None
    
    polygons = [get_tree_polygon_strict(x, y, deg) for x, y, deg in trees_data]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                intersection = poly.intersection(polygons[idx])
                if intersection.area > 0:
                    return True, f"Overlap {i}-{idx}, area={intersection.area / (float(scale_factor)**2):.2e}"
    return False, None

def check_n_strict(df, n):
    """Check if N-tree configuration has overlaps using strict detection"""
    mask = df['id'].str.startswith(f'{n:03d}_')
    group = df[mask]
    
    if len(group) != n:
        return True, f"Wrong count: {len(group)} vs {n}"
    
    xs = group['x'].str[1:].astype(float).values
    ys = group['y'].str[1:].astype(float).values
    degs = group['deg'].str[1:].astype(float).values
    
    trees_data = list(zip(xs, ys, degs))
    return has_overlap_strict(trees_data)

print('Strict overlap detection defined')

Strict overlap detection defined


In [3]:
# Load solutions
better_path = '/home/nonroot/snapshots/santa-2025/21328309254/code/experiments/003_valid_ensemble/submission.csv'
validated_path = '/home/code/experiments/004_sa_fast_v2_optimization/submission.csv'

df_better = pd.read_csv(better_path)
df_validated = pd.read_csv(validated_path)

print(f'Better solution loaded: {len(df_better)} rows')
print(f'Validated solution loaded: {len(df_validated)} rows')

Better solution loaded: 20100 rows
Validated solution loaded: 20100 rows


In [4]:
# Check N=31 specifically in both solutions
print('Checking N=31 in better solution:')
has_overlap, info = check_n_strict(df_better, 31)
print(f'  Better: overlap={has_overlap}, info={info}')

print('\nChecking N=31 in validated solution:')
has_overlap, info = check_n_strict(df_validated, 31)
print(f'  Validated: overlap={has_overlap}, info={info}')

Checking N=31 in better solution:
  Better: overlap=True, info=Overlap 1-7, area=1.64e-32

Checking N=31 in validated solution:
  Validated: overlap=False, info=None


In [5]:
# Re-check ALL N values with strict detection
print('Re-checking all N values with STRICT detection...')

better_overlaps = []
for n in range(1, 201):
    has_overlap, info = check_n_strict(df_better, n)
    if has_overlap:
        better_overlaps.append((n, info))
        if len(better_overlaps) <= 15:
            print(f'  N={n:3d}: {info}')

print(f'\nTotal N values with overlaps in better solution: {len(better_overlaps)}')
print(f'Overlap N values: {[x[0] for x in better_overlaps]}')

Re-checking all N values with STRICT detection...
  N=  2: Overlap 0-1, area=1.49e-01
  N=  3: Overlap 1-2, area=8.46e-31
  N=  4: Overlap 0-3, area=1.33e-04
  N=  5: Overlap 0-2, area=1.61e-02
  N= 16: Overlap 0-3, area=1.67e-02
  N= 19: Overlap 2-10, area=2.19e-32


  N= 29: Overlap 0-2, area=2.99e-27
  N= 31: Overlap 1-7, area=1.64e-32


  N= 40: Overlap 0-20, area=1.88e-02
  N= 46: Overlap 0-8, area=8.35e-03
  N= 47: Overlap 0-8, area=1.02e-02
  N= 48: Overlap 0-19, area=2.17e-03
  N= 53: Overlap 0-42, area=1.26e-05
  N= 54: Overlap 0-43, area=1.26e-05
  N= 55: Overlap 0-26, area=1.92e-05



Total N values with overlaps in better solution: 68
Overlap N values: [2, 3, 4, 5, 16, 19, 29, 31, 40, 46, 47, 48, 53, 54, 55, 56, 59, 62, 66, 69, 70, 71, 77, 78, 79, 80, 96, 97, 99, 102, 103, 107, 108, 109, 110, 118, 119, 120, 124, 125, 126, 127, 129, 130, 131, 138, 139, 140, 150, 152, 153, 154, 155, 156, 161, 164, 166, 167, 168, 175, 176, 177, 178, 179, 185, 190, 191, 192]


In [6]:
# Verify validated solution has NO overlaps
print('\nVerifying validated solution...')
validated_overlaps = []
for n in range(1, 201):
    has_overlap, info = check_n_strict(df_validated, n)
    if has_overlap:
        validated_overlaps.append((n, info))
        print(f'  N={n:3d}: {info}')

if not validated_overlaps:
    print('  \u2713 Validated solution has NO overlaps!')
else:
    print(f'  \u2717 Validated solution has {len(validated_overlaps)} overlaps!')


Verifying validated solution...


  âœ“ Validated solution has NO overlaps!


In [9]:
# Create PROPER hybrid ensemble using strict overlap detection
from numba import njit

TX_np = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY_np = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = mny = 1e300
    mxx = mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c, s = math.cos(r), math.sin(r)
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xs[i]
            Y = s * tx[j] + c * ty[j] + ys[i]
            mnx, mxx = min(mnx, X), max(mxx, X)
            mny, mxy = min(mny, Y), max(mxy, Y)
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def calculate_score_for_n(df, n):
    mask = df['id'].str.startswith(f'{n:03d}_')
    group = df[mask]
    if len(group) != n:
        return 999.0
    xs = group['x'].str[1:].astype(float).values
    ys = group['y'].str[1:].astype(float).values
    degs = group['deg'].str[1:].astype(float).values
    return score_group(xs, ys, degs, TX_np, TY_np)

print('Scoring functions defined')

Scoring functions defined


In [10]:
# Create hybrid: only use better solution for N values that pass STRICT overlap detection
print('Creating STRICT hybrid ensemble...')

# N values without overlaps in better solution (using strict detection)
overlap_ns_strict = set([x[0] for x in better_overlaps])
no_overlap_ns_strict = [n for n in range(1, 201) if n not in overlap_ns_strict]

print(f'N values without overlaps (strict): {len(no_overlap_ns_strict)}')
print(f'N values with overlaps (strict): {len(overlap_ns_strict)}')

# Start with validated solution
df_hybrid = df_validated.copy()

# For each N without overlaps in better solution, check if it improves score
improvement_details = []
for n in no_overlap_ns_strict:
    better_score = calculate_score_for_n(df_better, n)
    validated_score = calculate_score_for_n(df_validated, n)
    
    if better_score < validated_score:
        # Use better solution for this N
        mask_better = df_better['id'].str.startswith(f'{n:03d}_')
        mask_hybrid = df_hybrid['id'].str.startswith(f'{n:03d}_')
        df_hybrid.loc[mask_hybrid, ['x', 'y', 'deg']] = df_better.loc[mask_better, ['x', 'y', 'deg']].values
        improvement = validated_score - better_score
        improvement_details.append({'n': n, 'improvement': improvement, 'source': 'better'})
    else:
        improvement_details.append({'n': n, 'improvement': 0, 'source': 'validated'})

print(f'\nN values improved: {len([d for d in improvement_details if d["improvement"] > 0])}')

Creating STRICT hybrid ensemble...
N values without overlaps (strict): 132
N values with overlaps (strict): 68



N values improved: 117


In [11]:
# Calculate total score
def calculate_total_score(df):
    total = 0.0
    for n in range(1, 201):
        total += calculate_score_for_n(df, n)
    return total

hybrid_score = calculate_total_score(df_hybrid)
validated_score_total = calculate_total_score(df_validated)
better_score_total = calculate_total_score(df_better)

print(f'Better solution score: {better_score_total:.6f}')
print(f'Validated solution score: {validated_score_total:.6f}')
print(f'Hybrid solution score: {hybrid_score:.6f}')
print(f'Improvement over validated: {validated_score_total - hybrid_score:.6f}')

Better solution score: 70.523320
Validated solution score: 70.622435
Hybrid solution score: 70.616068
Improvement over validated: 0.006366


In [12]:
# FINAL VERIFICATION: Check hybrid has NO overlaps with strict detection
print('\nFINAL VERIFICATION with strict overlap detection...')
hybrid_overlaps = []
for n in range(1, 201):
    has_overlap, info = check_n_strict(df_hybrid, n)
    if has_overlap:
        hybrid_overlaps.append((n, info))
        print(f'  N={n:3d}: {info}')

if not hybrid_overlaps:
    print('  Hybrid solution passes STRICT overlap detection!')
    
    # Save the hybrid solution
    os.makedirs('/home/code/experiments/006_strict_hybrid', exist_ok=True)
    df_hybrid.to_csv('/home/code/experiments/006_strict_hybrid/submission.csv', index=False)
    
    import shutil
    shutil.copy('/home/code/experiments/006_strict_hybrid/submission.csv', '/home/submission/submission.csv')
    print(f'\nSaved to /home/submission/submission.csv')
    print(f'Final score: {hybrid_score:.6f}')
else:
    print(f'  Hybrid has {len(hybrid_overlaps)} overlaps - CANNOT SUBMIT')


FINAL VERIFICATION with strict overlap detection...


  Hybrid solution passes STRICT overlap detection!

Saved to /home/submission/submission.csv
Final score: 70.616068


In [None]:
# Check what N=31 looks like in our new hybrid vs the failed hybrid
print('Checking N=31 in new hybrid:')
mask_31 = df_hybrid['id'].str.startswith('031_')
print(df_hybrid[mask_31].head(3))

print('\nChecking N=31 in validated:')
mask_31_v = df_validated['id'].str.startswith('031_')
print(df_validated[mask_31_v].head(3))

# Verify N=31 in new hybrid matches validated
print('\nN=31 matches validated:', df_hybrid[mask_31][['x','y','deg']].values.tolist() == df_validated[mask_31_v][['x','y','deg']].values.tolist())