# Experiment 012: Long bbox3 Optimization (Multi-Phase)

Implement the full bbox3-runner workflow:
- Phase A: 2-min runs with n=[1000,1200,1500,1800,2000], r=[30,60,90] (15 combinations)
- Phase B: 10-min runs on top 6 candidates
- Phase C: 20-min runs on top 3 candidates
- Apply fix_direction() after each run
- Use overlap repair with donor solution

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
from decimal import Decimal, getcontext
from scipy.spatial import ConvexHull
from scipy.optimize import minimize_scalar
import subprocess
import shutil
import os
import time
from datetime import datetime

getcontext().prec = 30
scale_factor = Decimal("1e18")

# Paths
BASELINE_PATH = '/home/code/external_data/saspav/santa-2025.csv'
BBOX3_BIN = '/home/code/experiments/009_bbox3_optimizer/bbox3'
WORK_DIR = '/home/code/experiments/012_long_bbox3'
OUT_DIR = os.path.join(WORK_DIR, 'outputs')
os.makedirs(OUT_DIR, exist_ok=True)

print(f"Setup complete at {datetime.now()}")

Setup complete at 2026-01-19 22:44:40.765692


In [2]:
# ChristmasTree class from bbox3-runner (high precision)
class ChristmasTree:
    def __init__(self, center_x="0", center_y="0", angle="0"):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal("0.15")
        trunk_h = Decimal("0.2")
        base_w = Decimal("0.7")
        mid_w = Decimal("0.4")
        top_w = Decimal("0.25")
        tip_y = Decimal("0.8")
        tier_1_y = Decimal("0.5")
        tier_2_y = Decimal("0.25")
        base_y = Decimal("0.0")
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal("0.0") * scale_factor, tip_y * scale_factor),
            (top_w / Decimal("2") * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal("4") * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal("2") * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal("4") * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal("4")) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal("2")) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal("4")) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal("2")) * scale_factor, tier_1_y * scale_factor),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated,
            xoff=float(self.center_x * scale_factor),
            yoff=float(self.center_y * scale_factor),
        )

def load_configuration_from_df(n, df):
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"])[1:] if str(row["x"]).startswith('s') else str(row["x"])
        y = str(row["y"])[1:] if str(row["y"]).startswith('s') else str(row["y"])
        deg = str(row["deg"])[1:] if str(row["deg"]).startswith('s') else str(row["deg"])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def get_score(trees, n):
    if not trees:
        return float('inf')
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    return side_length**2 / n

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print("Classes and functions defined")

Classes and functions defined


In [3]:
def score_and_validate_submission(file_path, max_n=200):
    df = pd.read_csv(file_path)
    total_score = 0.0
    failed_overlap_n = []
    
    for n in range(1, max_n + 1):
        trees = load_configuration_from_df(n, df)
        if trees:
            current_score = get_score(trees, n)
            total_score += current_score
            if has_overlap(trees):
                failed_overlap_n.append(n)
    
    return {
        "ok": len(failed_overlap_n) == 0,
        "total_score": total_score,
        "failed_overlap_n": failed_overlap_n
    }

def repair_overlaps_in_place(submission_path, donor_path):
    df_sub = pd.read_csv(submission_path)
    df_donor = pd.read_csv(donor_path)
    
    failed_n = []
    for n in range(1, 201):
        trees = load_configuration_from_df(n, df_sub)
        if trees and has_overlap(trees):
            failed_n.append(n)
    
    if not failed_n:
        result = score_and_validate_submission(submission_path)
        return result
    
    print(f"    Repairing {len(failed_n)} overlapping configs")
    
    failed_prefixes = [f"{n:03d}_" for n in failed_n]
    df_to_keep = df_sub[~df_sub["id"].str.startswith(tuple(failed_prefixes))]
    df_replacement = df_donor[df_donor["id"].str.startswith(tuple(failed_prefixes))]
    df_repaired = pd.concat([df_to_keep, df_replacement]).sort_values(by="id").reset_index(drop=True)
    df_repaired.to_csv(submission_path, index=False)
    
    result = score_and_validate_submission(submission_path)
    return result

print("Validation functions defined")

Validation functions defined


In [4]:
def run_bbox3(timeout_sec, n_iters, r_value):
    """Run bbox3 optimizer."""
    work_csv = os.path.join(WORK_DIR, 'submission.csv')
    
    try:
        result = subprocess.run(
            [BBOX3_BIN, '-n', str(n_iters), '-r', str(r_value)],
            cwd=WORK_DIR,
            capture_output=True,
            text=True,
            timeout=timeout_sec
        )
        
        # Parse final score
        for line in result.stdout.split('\n'):
            if 'Final' in line and 'Score' in line:
                try:
                    score = float(line.split(':')[-1].strip())
                    return score
                except:
                    pass
    except subprocess.TimeoutExpired:
        pass
    
    # If we can't parse, compute score ourselves
    result = score_and_validate_submission(work_csv)
    return result['total_score']

# Validate baseline
print("Validating baseline...")
baseline_result = score_and_validate_submission(BASELINE_PATH)
print(f"Baseline score: {baseline_result['total_score']:.6f}")
print(f"Baseline valid: {baseline_result['ok']}")
best_score = baseline_result['total_score']

Validating baseline...


Baseline score: 70.659959
Baseline valid: True


In [None]:
# Phase A: Short exploration runs (2 min each)
print("\n" + "="*60)
print("PHASE A: Exploration (2-min runs)")
print("="*60)

n_values = [1000, 1200, 1500, 1800, 2000]
r_values = [30, 60, 90]

candidates = []
work_csv = os.path.join(WORK_DIR, 'submission.csv')

for r in r_values:
    for n in n_values:
        # Reset to baseline
        shutil.copy(BASELINE_PATH, work_csv)
        
        print(f"\n[A] n={n}, r={r}...")
        start = time.time()
        score = run_bbox3(timeout_sec=120, n_iters=n, r_value=r)
        elapsed = time.time() - start
        
        # Apply overlap repair
        repair_result = repair_overlaps_in_place(work_csv, BASELINE_PATH)
        final_score = repair_result['total_score']
        
        print(f"    Score: {final_score:.9f} (elapsed: {elapsed:.1f}s)")
        
        if final_score < best_score:
            improvement = best_score - final_score
            print(f"    NEW BEST! Improvement: {improvement:.9f}")
            best_score = final_score
            # Save this candidate
            output_path = os.path.join(OUT_DIR, f'A_n{n}_r{r}.csv')
            shutil.copy(work_csv, output_path)
        
        candidates.append({
            'n': n,
            'r': r,
            'score': final_score,
            'elapsed': elapsed
        })

# Sort candidates by score
candidates.sort(key=lambda x: x['score'])
print(f"\nPhase A complete. Best score: {best_score:.9f}")
print(f"Top 6 candidates: {[(c['n'], c['r'], f"{c['score']:.9f}") for c in candidates[:6]]}")

In [None]:
# Phase B: Medium runs on top 6 candidates (10 min each)
print("\n" + "="*60)
print("PHASE B: Refinement (10-min runs on top 6)")
print("="*60)

top_candidates = candidates[:6]

for c in top_candidates:
    # Reset to baseline
    shutil.copy(BASELINE_PATH, work_csv)
    
    print(f"\n[B] n={c['n']}, r={c['r']}...")
    start = time.time()
    score = run_bbox3(timeout_sec=600, n_iters=c['n'], r_value=c['r'])
    elapsed = time.time() - start
    
    # Apply overlap repair
    repair_result = repair_overlaps_in_place(work_csv, BASELINE_PATH)
    final_score = repair_result['total_score']
    
    print(f"    Score: {final_score:.9f} (elapsed: {elapsed:.1f}s)")
    
    if final_score < best_score:
        improvement = best_score - final_score
        print(f"    NEW BEST! Improvement: {improvement:.9f}")
        best_score = final_score
        # Save this candidate
        output_path = os.path.join(OUT_DIR, f'B_n{c["n"]}_r{c["r"]}.csv')
        shutil.copy(work_csv, output_path)
    
    c['score_B'] = final_score

# Re-sort by Phase B scores
top_candidates.sort(key=lambda x: x.get('score_B', x['score']))
print(f"\nPhase B complete. Best score: {best_score:.9f}")

In [None]:
# Phase C: Long runs on top 3 candidates (20 min each)
print("\n" + "="*60)
print("PHASE C: Final Polish (20-min runs on top 3)")
print("="*60)

top3 = top_candidates[:3]

for c in top3:
    # Reset to baseline
    shutil.copy(BASELINE_PATH, work_csv)
    
    print(f"\n[C] n={c['n']}, r={c['r']}...")
    start = time.time()
    score = run_bbox3(timeout_sec=1200, n_iters=c['n'], r_value=c['r'])
    elapsed = time.time() - start
    
    # Apply overlap repair
    repair_result = repair_overlaps_in_place(work_csv, BASELINE_PATH)
    final_score = repair_result['total_score']
    
    print(f"    Score: {final_score:.9f} (elapsed: {elapsed:.1f}s)")
    
    if final_score < best_score:
        improvement = best_score - final_score
        print(f"    NEW BEST! Improvement: {improvement:.9f}")
        best_score = final_score
        # Save this candidate
        output_path = os.path.join(OUT_DIR, f'C_n{c["n"]}_r{c["r"]}.csv')
        shutil.copy(work_csv, output_path)

print(f"\nPhase C complete. Best score: {best_score:.9f}")

In [None]:
# Final summary and save best submission
print("\n" + "="*60)
print("FINAL SUMMARY")
print("="*60)

print(f"Baseline score: {baseline_result['total_score']:.9f}")
print(f"Best score: {best_score:.9f}")
print(f"Total improvement: {baseline_result['total_score'] - best_score:.9f}")

# Find and save best submission
best_files = [f for f in os.listdir(OUT_DIR) if f.endswith('.csv')]
if best_files:
    # Find the one with best score
    best_file = None
    best_file_score = float('inf')
    for f in best_files:
        result = score_and_validate_submission(os.path.join(OUT_DIR, f))
        if result['total_score'] < best_file_score:
            best_file_score = result['total_score']
            best_file = f
    
    if best_file and best_file_score < baseline_result['total_score']:
        shutil.copy(os.path.join(OUT_DIR, best_file), '/home/submission/submission.csv')
        print(f"\nSaved best submission: {best_file} (score: {best_file_score:.9f})")
    else:
        shutil.copy(BASELINE_PATH, '/home/submission/submission.csv')
        print("\nNo improvement - saved baseline")
else:
    shutil.copy(BASELINE_PATH, '/home/submission/submission.csv')
    print("\nNo candidates found - saved baseline")

In [None]:
# Final validation
print("\n" + "="*60)
print("FINAL VALIDATION")
print("="*60)

final_result = score_and_validate_submission('/home/submission/submission.csv')
print(f"Final score: {final_result['total_score']:.9f}")
print(f"Valid: {final_result['ok']}")
print(f"Overlaps: {len(final_result['failed_overlap_n'])}")

print("\n" + "="*60)
print("EXPERIMENT 012 COMPLETE")
print("="*60)