# Experiment 005: Overlap Repair with SA

The ensemble scores 67.77 (below target 68.93!) but has overlaps in 30 N values.
This experiment attempts to repair overlaps using SA:
1. Start from the overlapping configuration
2. Use SA to find a NEARBY valid configuration
3. Accept moves that REDUCE overlaps OR improve score
4. The overlapping config shows WHERE good solutions exist

In [1]:
import numpy as np
import pandas as pd
import os
from numba import njit
import math
import time
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import shutil

os.chdir('/home/code/experiments/005_overlap_repair_sa')
print(f'Working directory: {os.getcwd()}')

Working directory: /home/code/experiments/005_overlap_repair_sa


In [2]:
# Tree polygon template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x, y

@njit
def get_bbox(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r)
        s = math.sin(r)
        xi = xs[i]
        yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    return max(mxx - mnx, mxy - mny)

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    side = get_bbox(xs, ys, degs, tx, ty)
    return side * side / n

tx, ty = make_polygon_template()
print('Template created')

Template created


In [3]:
# Overlap detection
def get_shapely_polygon(cx, cy, deg, tx, ty):
    r = deg * np.pi / 180.0
    c = np.cos(r)
    s = np.sin(r)
    px = c * tx - s * ty + cx
    py = s * tx + c * ty + cy
    return Polygon(zip(px, py))

def count_overlaps(xs, ys, degs, tx, ty):
    """Count number of overlapping pairs."""
    n = len(xs)
    if n <= 1:
        return 0
    
    polygons = [get_shapely_polygon(xs[i], ys[i], degs[i], tx, ty) for i in range(n)]
    count = 0
    
    for i in range(n):
        for j in range(i+1, n):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                count += 1
    return count

def has_overlap(xs, ys, degs, tx, ty):
    return count_overlaps(xs, ys, degs, tx, ty) > 0

print('Overlap detection defined')

Overlap detection defined


In [4]:
# Load submissions
def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

def load_submission(filepath):
    df = pd.read_csv(filepath)
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    configs = {}
    for n, g in df.groupby('N'):
        xs = strip(g['x'].to_numpy())
        ys = strip(g['y'].to_numpy())
        ds = strip(g['deg'].to_numpy())
        configs[n] = {'x': xs, 'y': ys, 'deg': ds}
    return configs

# Load baseline
baseline_configs = load_submission('/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv')

# Load ensemble (with overlaps)
ensemble_configs = load_submission('/home/nonroot/snapshots/santa-2025/21108486172/code/experiments/001_baseline/ensemble_submission.csv')

# Calculate scores
baseline_score = sum(score_group(baseline_configs[n]['x'], baseline_configs[n]['y'], baseline_configs[n]['deg'], tx, ty) for n in range(1, 201))
ensemble_score = sum(score_group(ensemble_configs[n]['x'], ensemble_configs[n]['y'], ensemble_configs[n]['deg'], tx, ty) for n in range(1, 201))

print(f'Baseline score: {baseline_score:.6f}')
print(f'Ensemble score: {ensemble_score:.6f}')
print(f'Potential improvement: {baseline_score - ensemble_score:.6f}')

Baseline score: 70.734327
Ensemble score: 67.772662
Potential improvement: 2.961665


In [5]:
# Find N values with overlaps in ensemble
overlap_n_values = []
for n in range(1, 201):
    c = ensemble_configs[n]
    overlaps = count_overlaps(c['x'], c['y'], c['deg'], tx, ty)
    if overlaps > 0:
        overlap_n_values.append((n, overlaps))

print(f'N values with overlaps: {len(overlap_n_values)}')
print('Top 10 by overlap count:')
for n, overlaps in sorted(overlap_n_values, key=lambda x: -x[1])[:10]:
    baseline_s = score_group(baseline_configs[n]['x'], baseline_configs[n]['y'], baseline_configs[n]['deg'], tx, ty)
    ensemble_s = score_group(ensemble_configs[n]['x'], ensemble_configs[n]['y'], ensemble_configs[n]['deg'], tx, ty)
    print(f'  N={n}: {overlaps} overlaps, baseline={baseline_s:.4f}, ensemble={ensemble_s:.4f}, improvement={baseline_s - ensemble_s:.4f}')

N values with overlaps: 30
Top 10 by overlap count:
  N=60: 109 overlaps, baseline=0.3573, ensemble=0.3544, improvement=0.0029
  N=48: 94 overlaps, baseline=0.3555, ensemble=0.3483, improvement=0.0072
  N=43: 80 overlaps, baseline=0.3700, ensemble=0.3430, improvement=0.0270
  N=39: 72 overlaps, baseline=0.3615, ensemble=0.3535, improvement=0.0080
  N=32: 66 overlaps, baseline=0.3657, ensemble=0.3102, improvement=0.0555
  N=33: 65 overlaps, baseline=0.3694, ensemble=0.3567, improvement=0.0127
  N=15: 61 overlaps, baseline=0.3792, ensemble=0.2142, improvement=0.1650
  N=27: 61 overlaps, baseline=0.3627, ensemble=0.3209, improvement=0.0418
  N=22: 58 overlaps, baseline=0.3753, ensemble=0.2943, improvement=0.0809
  N=17: 55 overlaps, baseline=0.3700, ensemble=0.2088, improvement=0.1613


In [None]:
# SA-based overlap repair
def repair_with_sa(xs, ys, degs, tx, ty, max_iter=20000, seed=42):
    """Use SA to repair overlaps while trying to preserve good score."""
    np.random.seed(seed)
    n = len(xs)
    
    best_xs = xs.copy()
    best_ys = ys.copy()
    best_degs = degs.copy()
    best_overlaps = count_overlaps(best_xs, best_ys, best_degs, tx, ty)
    best_side = get_bbox(best_xs, best_ys, best_degs, tx, ty)
    
    curr_xs = xs.copy()
    curr_ys = ys.copy()
    curr_degs = degs.copy()
    curr_overlaps = best_overlaps
    curr_side = best_side
    
    # Temperature schedule
    T0 = 0.5
    Tf = 0.0001
    cooling = (Tf / T0) ** (1.0 / max_iter)
    T = T0
    
    # Step sizes - start small to stay near the good configuration
    step_sizes = [0.05, 0.02, 0.01, 0.005, 0.002, 0.001]
    angle_steps = [5.0, 2.0, 1.0, 0.5, 0.2, 0.1]
    
    for it in range(max_iter):
        step_idx = min(len(step_sizes) - 1, it * len(step_sizes) // max_iter)
        step = step_sizes[step_idx]
        angle_step = angle_steps[step_idx]
        
        # Pick random tree
        i = np.random.randint(n)
        
        # Save old position
        old_x, old_y, old_deg = curr_xs[i], curr_ys[i], curr_degs[i]
        
        # Random move
        move_type = np.random.randint(3)
        if move_type == 0 or move_type == 2:
            curr_xs[i] += np.random.uniform(-step, step)
            curr_ys[i] += np.random.uniform(-step, step)
        if move_type == 1 or move_type == 2:
            curr_degs[i] = (curr_degs[i] + np.random.uniform(-angle_step, angle_step)) % 360
        
        # Calculate new state
        new_overlaps = count_overlaps(curr_xs, curr_ys, curr_degs, tx, ty)
        new_side = get_bbox(curr_xs, curr_ys, curr_degs, tx, ty)
        
        # Accept if: reduces overlaps, OR (no new overlaps AND improves score)
        # Use combined objective: overlaps * penalty + side
        penalty = 10.0  # Heavy penalty for overlaps
        old_obj = curr_overlaps * penalty + curr_side
        new_obj = new_overlaps * penalty + new_side
        
        delta = new_obj - old_obj
        if delta < 0 or np.random.random() < np.exp(-delta / T):
            # Accept
            curr_overlaps = new_overlaps
            curr_side = new_side
            
            # Update best if no overlaps and better score
            if new_overlaps == 0 and new_side < best_side:
                best_xs = curr_xs.copy()
                best_ys = curr_ys.copy()
                best_degs = curr_degs.copy()
                best_overlaps = 0
                best_side = new_side
            elif new_overlaps < best_overlaps:
                best_xs = curr_xs.copy()
                best_ys = curr_ys.copy()
                best_degs = curr_degs.copy()
                best_overlaps = new_overlaps
                best_side = new_side
        else:
            # Reject
            curr_xs[i], curr_ys[i], curr_degs[i] = old_x, old_y, old_deg
        
        T *= cooling
    
    return best_xs, best_ys, best_degs, best_overlaps, best_side

print('SA repair function defined')

In [None]:
# Try to repair overlapping configurations
print('Attempting to repair overlapping configurations...')
start_time = time.time()

repaired_configs = {}
failed_repairs = []

# Start with baseline for all N
for n in range(1, 201):
    repaired_configs[n] = baseline_configs[n].copy()

# Try to repair each overlapping N value
for n, overlaps in sorted(overlap_n_values, key=lambda x: x[0]):
    c = ensemble_configs[n]
    baseline_s = score_group(baseline_configs[n]['x'], baseline_configs[n]['y'], baseline_configs[n]['deg'], tx, ty)
    ensemble_s = score_group(c['x'], c['y'], c['deg'], tx, ty)
    
    # Try SA repair with multiple restarts
    best_xs, best_ys, best_degs = None, None, None
    best_score = baseline_s
    best_overlaps = overlaps
    
    for restart in range(5):
        new_xs, new_ys, new_degs, new_overlaps, new_side = repair_with_sa(
            c['x'], c['y'], c['deg'], tx, ty,
            max_iter=10000, seed=42 + restart * 1000 + n
        )
        new_score = new_side * new_side / n
        
        if new_overlaps == 0 and new_score < best_score:
            best_xs, best_ys, best_degs = new_xs, new_ys, new_degs
            best_score = new_score
            best_overlaps = 0
    
    if best_overlaps == 0 and best_score < baseline_s:
        repaired_configs[n] = {'x': best_xs, 'y': best_ys, 'deg': best_degs}
        print(f'N={n}: REPAIRED! {baseline_s:.6f} -> {best_score:.6f} (improvement: {baseline_s - best_score:.6f})')
    else:
        failed_repairs.append(n)
        if n <= 20:  # Only print for small N
            print(f'N={n}: Failed to repair (overlaps={best_overlaps}, baseline={baseline_s:.6f})')

print(f'\nTime: {time.time() - start_time:.1f}s')
print(f'Successfully repaired: {len(overlap_n_values) - len(failed_repairs)} / {len(overlap_n_values)}')

In [None]:
# Calculate final score
final_score = 0.0
for n in range(1, 201):
    c = repaired_configs[n]
    final_score += score_group(c['x'], c['y'], c['deg'], tx, ty)

print(f'Baseline score: {baseline_score:.6f}')
print(f'Final score: {final_score:.6f}')
print(f'Improvement: {baseline_score - final_score:.6f}')

In [None]:
# Validate for overlaps
print('Validating for overlaps...')
overlap_n = []
for n in range(1, 201):
    c = repaired_configs[n]
    if has_overlap(c['x'], c['y'], c['deg'], tx, ty):
        overlap_n.append(n)

if overlap_n:
    print(f'Overlaps found in N: {overlap_n}')
else:
    print('No overlaps detected')

In [None]:
# Save submission
def save_submission(configs, filepath):
    rows = []
    for n in range(1, 201):
        if n in configs:
            c = configs[n]
            for i in range(len(c['x'])):
                rows.append({
                    'id': f'{n:03d}_{i}',
                    'x': f's{c["x"][i]}',
                    'y': f's{c["y"][i]}',
                    'deg': f's{c["deg"][i]}'
                })
    df = pd.DataFrame(rows)
    df.to_csv(filepath, index=False)
    print(f'Saved to {filepath}')

save_submission(repaired_configs, 'submission.csv')
shutil.copy('submission.csv', '/home/submission/submission.csv')
print('Submission saved to /home/submission/submission.csv')

In [None]:
# Final summary
print(f'\n=== EXPERIMENT 005 SUMMARY ===')
print(f'Baseline score: {baseline_score:.6f}')
print(f'Ensemble score (with overlaps): {ensemble_score:.6f}')
print(f'Final score: {final_score:.6f}')
print(f'Improvement over baseline: {baseline_score - final_score:.6f}')
print(f'Overlaps in final: {len(overlap_n)}')
print(f'Failed repairs: {len(failed_repairs)} / {len(overlap_n_values)}')