# Experiment 011: bbox3 with Overlap Repair

Implement the full bbox3-runner approach:
1. Run bbox3 optimizer for extended time
2. Apply overlap repair using saspav baseline as donor
3. Validate with strict overlap detection
4. Only accept improvements that pass validation

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
from shapely.strtree import STRtree
from decimal import Decimal, getcontext
from scipy.spatial import ConvexHull
from scipy.optimize import minimize_scalar
import subprocess
import shutil
import os
import time
from tqdm import tqdm

getcontext().prec = 30
scale_factor = Decimal("1e18")

# Paths
BASELINE_PATH = '/home/code/external_data/saspav/santa-2025.csv'
BBOX3_BIN = '/home/code/experiments/009_bbox3_optimizer/bbox3'
WORK_DIR = '/home/code/experiments/011_bbox3_with_repair'

print("Setup complete")

Setup complete


In [2]:
# ChristmasTree class from bbox3-runner (high precision)
class ChristmasTree:
    def __init__(self, center_x="0", center_y="0", angle="0"):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal("0.15")
        trunk_h = Decimal("0.2")
        base_w = Decimal("0.7")
        mid_w = Decimal("0.4")
        top_w = Decimal("0.25")
        tip_y = Decimal("0.8")
        tier_1_y = Decimal("0.5")
        tier_2_y = Decimal("0.25")
        base_y = Decimal("0.0")
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal("0.0") * scale_factor, tip_y * scale_factor),
            (top_w / Decimal("2") * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal("4") * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal("2") * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal("4") * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal("4")) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal("2")) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal("4")) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal("2")) * scale_factor, tier_1_y * scale_factor),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated,
            xoff=float(self.center_x * scale_factor),
            yoff=float(self.center_y * scale_factor),
        )

print("ChristmasTree class defined")

ChristmasTree class defined


In [3]:
def load_configuration_from_df(n, df):
    """Load all trees for a given N from the submission DataFrame."""
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"])[1:] if str(row["x"]).startswith('s') else str(row["x"])
        y = str(row["y"])[1:] if str(row["y"]).startswith('s') else str(row["y"])
        deg = str(row["deg"])[1:] if str(row["deg"]).startswith('s') else str(row["deg"])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def get_score(trees, n):
    """Calculate the score (S^2 / N) for a given configuration."""
    if not trees:
        return float('inf')
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    return side_length**2 / n

def has_overlap(trees):
    """Check if any two ChristmasTree polygons overlap using STRtree."""
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print("Helper functions defined")

Helper functions defined


In [4]:
def score_and_validate_submission(file_path, max_n=200):
    """Score and validate a submission file."""
    df = pd.read_csv(file_path)
    total_score = 0.0
    failed_overlap_n = []
    
    for n in range(1, max_n + 1):
        trees = load_configuration_from_df(n, df)
        if trees:
            current_score = get_score(trees, n)
            total_score += current_score
            if has_overlap(trees):
                failed_overlap_n.append(n)
    
    return {
        "ok": len(failed_overlap_n) == 0,
        "total_score": total_score,
        "failed_overlap_n": failed_overlap_n
    }

def repair_overlaps_in_place(submission_path, donor_path):
    """Replace overlapping configurations with donor configurations."""
    df_sub = pd.read_csv(submission_path)
    df_donor = pd.read_csv(donor_path)
    
    # Find overlapping N values
    failed_n = []
    for n in range(1, 201):
        trees = load_configuration_from_df(n, df_sub)
        if trees and has_overlap(trees):
            failed_n.append(n)
    
    if not failed_n:
        # No overlaps, just validate and return
        result = score_and_validate_submission(submission_path)
        return result
    
    print(f"  Repairing {len(failed_n)} overlapping configurations: {failed_n[:10]}...")
    
    # Replace overlapping configs with donor
    failed_prefixes = [f"{n:03d}_" for n in failed_n]
    df_to_keep = df_sub[~df_sub["id"].str.startswith(tuple(failed_prefixes))]
    df_replacement = df_donor[df_donor["id"].str.startswith(tuple(failed_prefixes))]
    df_repaired = pd.concat([df_to_keep, df_replacement]).sort_values(by="id").reset_index(drop=True)
    df_repaired.to_csv(submission_path, index=False)
    
    # Validate repaired submission
    result = score_and_validate_submission(submission_path)
    return result

print("Repair functions defined")

Repair functions defined


In [5]:
# Test baseline validation
print("Validating baseline...")
baseline_result = score_and_validate_submission(BASELINE_PATH)
print(f"Baseline score: {baseline_result['total_score']:.6f}")
print(f"Baseline overlaps: {len(baseline_result['failed_overlap_n'])}")
print(f"Baseline valid: {baseline_result['ok']}")

Validating baseline...


Baseline score: 70.659959
Baseline overlaps: 0
Baseline valid: True


In [None]:
# Run bbox3 for extended time with overlap repair
def run_bbox3_with_repair(timeout_sec, n_iters=1500, r_value=60):
    """Run bbox3 optimizer and apply overlap repair."""
    work_csv = os.path.join(WORK_DIR, 'submission.csv')
    shutil.copy(BASELINE_PATH, work_csv)
    
    print(f"Running bbox3 with n={n_iters}, r={r_value}, timeout={timeout_sec}s...")
    start = time.time()
    
    try:
        result = subprocess.run(
            [BBOX3_BIN, '-n', str(n_iters), '-r', str(r_value)],
            cwd=WORK_DIR,
            capture_output=True,
            text=True,
            timeout=timeout_sec
        )
        elapsed = time.time() - start
        print(f"  Completed in {elapsed:.1f}s")
        
        # Parse final score from output
        for line in result.stdout.split('\n'):
            if 'Final' in line and 'Score' in line:
                print(f"  {line}")
    except subprocess.TimeoutExpired:
        elapsed = time.time() - start
        print(f"  Timeout after {elapsed:.1f}s")
    
    # Apply overlap repair
    print("  Applying overlap repair...")
    repair_result = repair_overlaps_in_place(work_csv, BASELINE_PATH)
    
    return work_csv, repair_result

# Run for 10 minutes
print("\n" + "="*60)
print("Running bbox3 for 10 minutes with overlap repair...")
print("="*60)
work_csv, result = run_bbox3_with_repair(timeout_sec=600, n_iters=2000, r_value=60)
print(f"\nResult: score={result['total_score']:.6f}, valid={result['ok']}, overlaps={len(result['failed_overlap_n'])}")

In [None]:
# Compare with baseline
print("\n" + "="*60)
print("COMPARISON")
print("="*60)
print(f"Baseline score: {baseline_result['total_score']:.6f}")
print(f"Optimized score: {result['total_score']:.6f}")
print(f"Improvement: {baseline_result['total_score'] - result['total_score']:.9f}")
print(f"Valid: {result['ok']}")

In [None]:
# Save submission if valid and improved
if result['ok'] and result['total_score'] < baseline_result['total_score'] - 1e-9:
    shutil.copy(work_csv, '/home/submission/submission.csv')
    print(f"\nSaved improved submission: {result['total_score']:.6f}")
elif result['ok']:
    # Valid but no improvement - save baseline
    shutil.copy(BASELINE_PATH, '/home/submission/submission.csv')
    print("\nNo improvement - saved baseline")
else:
    print(f"\nInvalid submission - overlaps: {result['failed_overlap_n']}")

In [None]:
# Summary
print("="*60)
print("EXPERIMENT 011 SUMMARY: bbox3 with Overlap Repair")
print("="*60)
print(f"Baseline score: {baseline_result['total_score']:.6f}")
print(f"Optimized score: {result['total_score']:.6f}")
print(f"Improvement: {baseline_result['total_score'] - result['total_score']:.9f}")
print(f"Valid: {result['ok']}")
print(f"Overlaps repaired: {len(result['failed_overlap_n'])}")
print("="*60)