# Experiment 009: Multi-Seed bbox3 with Overlap Repair

Run bbox3 optimizer with multiple seeds, then ensemble by picking best per-N, with overlap repair using saspav baseline as donor.

In [1]:
import pandas as pd
import numpy as np
import subprocess
import shutil
import os
from shapely.geometry import Polygon
from shapely.ops import unary_union
from decimal import Decimal, getcontext
from tqdm import tqdm
import time

getcontext().prec = 30

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = [(tx * cos_a - ty * sin_a + x, tx * sin_a + ty * cos_a + y) for tx, ty in zip(TX, TY)]
    return Polygon(vertices)

def check_overlaps_for_n(df, n):
    """Check if configuration N has overlapping trees using stricter detection."""
    prefix = f"{n:03d}_"
    trees = df[df['id'].str.startswith(prefix)]
    if len(trees) != n:
        return True, float('inf')
    
    polygons = []
    for _, row in trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        polygons.append(create_tree_polygon(x, y, deg))
    
    # Stricter overlap detection with buffer
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            # Use buffer to detect near-overlaps
            if polygons[i].buffer(1e-12).intersects(polygons[j].buffer(1e-12)):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-15:  # Stricter threshold
                    return True, intersection.area
    return False, 0

def compute_score_for_n(df, n):
    """Compute score for configuration N."""
    prefix = f"{n:03d}_"
    trees = df[df['id'].str.startswith(prefix)]
    if len(trees) != n:
        return float('inf')
    
    polygons = [create_tree_polygon(parse_value(row['x']), parse_value(row['y']), parse_value(row['deg'])) for _, row in trees.iterrows()]
    all_points = []
    for poly in polygons:
        all_points.extend(list(poly.exterior.coords))
    all_points = np.array(all_points)
    side = max(all_points.max(axis=0) - all_points.min(axis=0))
    return side**2 / n

def compute_total_score(df):
    return sum(compute_score_for_n(df, n) for n in range(1, 201))

print("Functions defined")

Functions defined


In [2]:
# Load baseline
BASELINE_PATH = '/home/code/external_data/saspav/santa-2025.csv'
BBOX3_BIN = '/home/code/experiments/009_bbox3_optimizer/bbox3'
WORK_DIR = '/home/code/experiments/009_multiseed_bbox3'

df_baseline = pd.read_csv(BASELINE_PATH)
baseline_score = compute_total_score(df_baseline)
print(f"Baseline score: {baseline_score:.6f}")

# Verify baseline has no overlaps
overlap_count = 0
for n in range(1, 201):
    has_overlap, _ = check_overlaps_for_n(df_baseline, n)
    if has_overlap:
        overlap_count += 1
print(f"Baseline overlaps: {overlap_count}/200")

Baseline score: 70.659959


Baseline overlaps: 0/200


In [3]:
# Run bbox3 with different parameters and collect outputs
def run_bbox3(n_iters, r_value, timeout_sec=120):
    """Run bbox3 optimizer and return the output CSV path."""
    work_csv = os.path.join(WORK_DIR, 'submission.csv')
    shutil.copy(BASELINE_PATH, work_csv)
    
    try:
        result = subprocess.run(
            [BBOX3_BIN, '-n', str(n_iters), '-r', str(r_value)],
            cwd=WORK_DIR,
            capture_output=True,
            text=True,
            timeout=timeout_sec
        )
        # Parse final score from output
        for line in result.stdout.split('\n'):
            if 'Final Total Score' in line:
                score = float(line.split(':')[-1].strip())
                return work_csv, score
    except subprocess.TimeoutExpired:
        pass
    
    return work_csv, None

# Test run
print("Testing bbox3...")
test_csv, test_score = run_bbox3(500, 60, timeout_sec=30)
if test_score:
    print(f"Test run score: {test_score:.6f}")
else:
    print("Test run completed (no score parsed)")

Testing bbox3...


Test run completed (no score parsed)


In [None]:
# Run multiple bbox3 configurations and collect results
configs = [
    (1000, 30, 60),   # (n_iters, r_value, timeout)
    (1000, 60, 60),
    (1000, 90, 60),
    (1500, 30, 90),
    (1500, 60, 90),
    (1500, 90, 90),
    (2000, 60, 120),
    (2000, 90, 120),
]

results = []
for n_iters, r_value, timeout in configs:
    print(f"\nRunning bbox3 with n={n_iters}, r={r_value}, timeout={timeout}s...")
    start = time.time()
    
    # Copy baseline to work directory
    work_csv = os.path.join(WORK_DIR, 'submission.csv')
    shutil.copy(BASELINE_PATH, work_csv)
    
    try:
        result = subprocess.run(
            [BBOX3_BIN, '-n', str(n_iters), '-r', str(r_value)],
            cwd=WORK_DIR,
            capture_output=True,
            text=True,
            timeout=timeout
        )
        elapsed = time.time() - start
        
        # Parse final score
        final_score = None
        for line in result.stdout.split('\n'):
            if 'Final Total Score' in line:
                final_score = float(line.split(':')[-1].strip())
                break
        
        if final_score:
            # Save output
            output_path = os.path.join(WORK_DIR, f'bbox3_n{n_iters}_r{r_value}.csv')
            shutil.copy(work_csv, output_path)
            results.append({
                'n_iters': n_iters,
                'r_value': r_value,
                'score': final_score,
                'path': output_path,
                'elapsed': elapsed
            })
            print(f"  Score: {final_score:.6f} (elapsed: {elapsed:.1f}s)")
        else:
            print(f"  No score parsed (elapsed: {elapsed:.1f}s)")
    except subprocess.TimeoutExpired:
        print(f"  Timeout after {timeout}s")

print(f"\nCompleted {len(results)} runs")

In [None]:
# Analyze results
if results:
    df_results = pd.DataFrame(results)
    print("\nResults summary:")
    print(df_results[['n_iters', 'r_value', 'score', 'elapsed']].to_string())
    
    best_result = min(results, key=lambda x: x['score'])
    print(f"\nBest result: n={best_result['n_iters']}, r={best_result['r_value']}, score={best_result['score']:.6f}")
    print(f"Improvement over baseline: {baseline_score - best_result['score']:.9f}")
else:
    print("No results collected")

In [None]:
# Ensemble: for each N, pick the best configuration from all runs
print("\nBuilding ensemble from all runs...")

# Load all result CSVs
all_dfs = {}
for r in results:
    all_dfs[r['path']] = pd.read_csv(r['path'])

# Add baseline
all_dfs['baseline'] = df_baseline

# For each N, find best configuration
best_per_n = {}
for n in tqdm(range(1, 201)):
    best_score = float('inf')
    best_source = None
    
    for source, df in all_dfs.items():
        score = compute_score_for_n(df, n)
        if score < best_score:
            best_score = score
            best_source = source
    
    best_per_n[n] = (best_score, best_source)

# Count wins per source
source_wins = {}
for n, (score, source) in best_per_n.items():
    source_name = os.path.basename(source) if source != 'baseline' else 'baseline'
    source_wins[source_name] = source_wins.get(source_name, 0) + 1

print("\nSource wins:")
for source, wins in sorted(source_wins.items(), key=lambda x: -x[1]):
    print(f"  {source}: {wins} N values")

ensemble_score = sum(best_per_n[n][0] for n in range(1, 201))
print(f"\nEnsemble score: {ensemble_score:.6f}")
print(f"Improvement over baseline: {baseline_score - ensemble_score:.9f}")

In [None]:
# Build ensemble submission with overlap repair
print("\nBuilding ensemble submission with overlap repair...")

ensemble_rows = []
overlap_repairs = 0

for n in tqdm(range(1, 201)):
    score, source = best_per_n[n]
    df_source = all_dfs[source]
    
    # Get trees for this N
    prefix = f"{n:03d}_"
    trees = df_source[df_source['id'].str.startswith(prefix)].copy()
    
    # Check for overlaps
    has_overlap, _ = check_overlaps_for_n(df_source, n)
    
    if has_overlap:
        # Use baseline instead (overlap repair)
        trees = df_baseline[df_baseline['id'].str.startswith(prefix)].copy()
        overlap_repairs += 1
    
    ensemble_rows.append(trees)

df_ensemble = pd.concat(ensemble_rows, ignore_index=True)
print(f"\nOverlap repairs: {overlap_repairs}/200")
print(f"Ensemble rows: {len(df_ensemble)}")

In [None]:
# Validate ensemble
print("\nValidating ensemble...")
overlap_count = 0
for n in range(1, 201):
    has_overlap, _ = check_overlaps_for_n(df_ensemble, n)
    if has_overlap:
        overlap_count += 1
        if overlap_count <= 5:
            print(f"  N={n}: OVERLAP DETECTED")

print(f"\nTotal overlaps: {overlap_count}/200")

if overlap_count == 0:
    final_score = compute_total_score(df_ensemble)
    print(f"\nFinal ensemble score: {final_score:.6f}")
    print(f"Improvement over baseline: {baseline_score - final_score:.9f}")
    
    # Save submission
    df_ensemble.to_csv('/home/submission/submission.csv', index=False)
    print("\nSaved to /home/submission/submission.csv")
else:
    print("\nOverlaps detected - not saving")

In [None]:
# Summary
print("="*60)
print("EXPERIMENT 009 SUMMARY: Multi-Seed bbox3 with Overlap Repair")
print("="*60)
print(f"Baseline score: {baseline_score:.6f}")
if overlap_count == 0:
    print(f"Ensemble score: {final_score:.6f}")
    print(f"Improvement: {baseline_score - final_score:.9f}")
else:
    print(f"Ensemble INVALID - {overlap_count} overlaps")
print(f"Overlap repairs: {overlap_repairs}/200")
print(f"Runs completed: {len(results)}")
print("="*60)