# bbox3 Optimization with REPAIR Strategy

Run bbox3 optimizer on baseline, then validate each N with Shapely.
- If valid AND better than baseline: keep
- If invalid OR worse: replace with baseline

In [1]:
import numpy as np
import pandas as pd
import subprocess
import os
import shutil
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

getcontext().prec = 25
scale_factor = Decimal('1e15')

WORK_DIR = '/home/code/experiments/002_bbox3_repair'
BASELINE_CSV = os.path.join(WORK_DIR, 'baseline.csv')
WORK_CSV = os.path.join(WORK_DIR, 'submission.csv')
BBOX3_PATH = '/home/code/bbox3'

In [2]:
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

In [3]:
def has_overlap(trees):
    """Check if any two ChristmasTree polygons overlap."""
    if len(trees) <= 1:
        return False
    
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False


def get_side_length(trees):
    """Get the side length of the bounding box for a list of trees."""
    if not trees:
        return Decimal('0')
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    width = maxx - minx
    height = maxy - miny
    return max(width, height)


def calculate_score(side_lengths):
    """Calculate the total score from side lengths dict."""
    score = Decimal('0')
    for n, side in side_lengths.items():
        score += side ** 2 / Decimal(str(n))
    return float(score)

In [4]:
def load_submission(csv_path):
    """Load a submission CSV and return dict of tree lists and side lengths."""
    df = pd.read_csv(csv_path)
    df['x'] = df['x'].astype(str).str.lstrip('s')
    df['y'] = df['y'].astype(str).str.lstrip('s')
    df['deg'] = df['deg'].astype(str).str.lstrip('s')
    
    tree_lists = {}
    side_lengths = {}
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        group = df[df['id'].str.startswith(prefix)]
        trees = [ChristmasTree(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
        tree_lists[n] = trees
        side_lengths[n] = get_side_length(trees)
    
    return tree_lists, side_lengths

In [5]:
# Load baseline
print("Loading baseline...")
baseline_trees, baseline_sides = load_submission(BASELINE_CSV)
baseline_score = calculate_score(baseline_sides)
print(f"Baseline score: {baseline_score:.6f}")

Loading baseline...


Baseline score: 70.627582


In [6]:
# Run bbox3 optimizer
print("\nRunning bbox3 optimizer...")
os.chdir(WORK_DIR)

# Run with moderate parameters first
result = subprocess.run(
    [BBOX3_PATH],
    capture_output=True,
    text=True,
    timeout=300  # 5 minute timeout
)
print(result.stdout)
print(result.stderr)


Running bbox3 optimizer...


Loading submission.csv...
Initial Score: 70.627582
Running SA optimization (iterations=1000, r=30)...
Final Score: 70.627582
Improvement: 0.000000
Time: 112.56s
Saved to submission.csv




In [7]:
# Load optimized result
print("\nLoading optimized result...")
optimized_trees, optimized_sides = load_submission(WORK_CSV)
optimized_score = calculate_score(optimized_sides)
print(f"Optimized score (before repair): {optimized_score:.6f}")


Loading optimized result...


Optimized score (before repair): 70.627582


In [8]:
# REPAIR: Validate each N and replace invalid/worse with baseline
print("\nApplying REPAIR strategy...")

repaired_trees = {}
repaired_sides = {}
stats = {'improved': 0, 'invalid': 0, 'worse': 0, 'same': 0}

for n in range(1, 201):
    opt_trees = optimized_trees[n]
    opt_side = optimized_sides[n]
    base_trees = baseline_trees[n]
    base_side = baseline_sides[n]
    
    # Check if optimized is valid
    if has_overlap(opt_trees):
        # Invalid - use baseline
        repaired_trees[n] = base_trees
        repaired_sides[n] = base_side
        stats['invalid'] += 1
    elif opt_side < base_side:
        # Valid and better - use optimized
        repaired_trees[n] = opt_trees
        repaired_sides[n] = opt_side
        stats['improved'] += 1
        if n <= 20:  # Log improvements for small N (high leverage)
            print(f"  N={n}: improved {float(base_side):.6f} -> {float(opt_side):.6f}")
    elif opt_side > base_side:
        # Valid but worse - use baseline
        repaired_trees[n] = base_trees
        repaired_sides[n] = base_side
        stats['worse'] += 1
    else:
        # Same - use baseline
        repaired_trees[n] = base_trees
        repaired_sides[n] = base_side
        stats['same'] += 1

repaired_score = calculate_score(repaired_sides)
print(f"\nRepair stats: {stats}")
print(f"Repaired score: {repaired_score:.6f}")
print(f"Improvement over baseline: {baseline_score - repaired_score:.6f}")


Applying REPAIR strategy...



Repair stats: {'improved': 0, 'invalid': 120, 'worse': 0, 'same': 80}
Repaired score: 70.627582
Improvement over baseline: 0.000000


In [9]:
# Run bbox3 with different parameters
print("\nRunning bbox3 with n=5000, r=50...")

# Reset to baseline
shutil.copy(BASELINE_CSV, WORK_CSV)

# Run with higher iterations
result = subprocess.run(
    [BBOX3_PATH, '-n', '5000', '-r', '50'],
    capture_output=True,
    text=True,
    timeout=600,
    cwd=WORK_DIR
)
print(result.stdout[-500:] if len(result.stdout) > 500 else result.stdout)


Running bbox3 with n=5000, r=50...


Loading submission.csv...
Initial Score: 70.627582
Running SA optimization (iterations=5000, r=50)...
Final Score: 70.627582
Improvement: 0.000000
Time: 125.28s
Saved to submission.csv



In [10]:
# Load and repair
print("\nLoading and repairing...")
opt2_trees, opt2_sides = load_submission(WORK_CSV)
opt2_score = calculate_score(opt2_sides)
print(f"Optimized score (before repair): {opt2_score:.6f}")

# REPAIR
repaired2_trees = {}
repaired2_sides = {}
stats2 = {'improved': 0, 'invalid': 0, 'worse': 0, 'same': 0}

for n in range(1, 201):
    opt_trees = opt2_trees[n]
    opt_side = opt2_sides[n]
    base_trees = baseline_trees[n]
    base_side = baseline_sides[n]
    
    if has_overlap(opt_trees):
        repaired2_trees[n] = base_trees
        repaired2_sides[n] = base_side
        stats2['invalid'] += 1
    elif opt_side < base_side:
        repaired2_trees[n] = opt_trees
        repaired2_sides[n] = opt_side
        stats2['improved'] += 1
        if n <= 50:
            improvement = float(base_side - opt_side)
            print(f"  N={n}: improved by {improvement:.6f}")
    elif opt_side > base_side:
        repaired2_trees[n] = base_trees
        repaired2_sides[n] = base_side
        stats2['worse'] += 1
    else:
        repaired2_trees[n] = base_trees
        repaired2_sides[n] = base_side
        stats2['same'] += 1

repaired2_score = calculate_score(repaired2_sides)
print(f"\nRepair stats: {stats2}")
print(f"Repaired score: {repaired2_score:.6f}")
print(f"Improvement over baseline: {baseline_score - repaired2_score:.6f}")


Loading and repairing...


Optimized score (before repair): 70.627582



Repair stats: {'improved': 0, 'invalid': 120, 'worse': 0, 'same': 80}
Repaired score: 70.627582
Improvement over baseline: 0.000000


In [None]:
# Use the best result
if repaired2_score < repaired_score:
    best_trees = repaired2_trees
    best_sides = repaired2_sides
    best_score = repaired2_score
else:
    best_trees = repaired_trees
    best_sides = repaired_sides
    best_score = repaired_score

print(f"Best score: {best_score:.6f}")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Total improvement: {baseline_score - best_score:.6f}")

In [None]:
# Save best submission
print("\nSaving best submission...")

rows = []
for n in range(1, 201):
    trees = best_trees[n]
    for t_idx, tree in enumerate(trees):
        rows.append({
            'id': f'{n:03d}_{t_idx}',
            'x': f's{float(tree.center_x):.12f}',
            'y': f's{float(tree.center_y):.12f}',
            'deg': f's{float(tree.angle):.12f}'
        })

submission_df = pd.DataFrame(rows)
submission_df.to_csv(os.path.join(WORK_DIR, 'submission_repaired.csv'), index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved submission with {len(submission_df)} rows")
print(f"\nFinal score: {best_score:.6f}")
print(f"Target: 68.901319")
print(f"Gap: {best_score - 68.901319:.6f}")

In [12]:
# Let's analyze which N values have the most room for improvement
# and try to find better solutions from other sources

print("Analyzing score contributions by N...")
contributions = []
for n in range(1, 201):
    side = baseline_sides[n]
    contrib = float(side ** 2 / Decimal(str(n)))
    contributions.append((n, float(side), contrib))

# Sort by contribution (highest first)
contributions.sort(key=lambda x: x[2], reverse=True)

print("\nTop 20 N values by score contribution:")
for n, side, contrib in contributions[:20]:
    print(f"N={n:3d}: side={side:.6f}, contribution={contrib:.6f}")

Analyzing score contributions by N...

Top 20 N values by score contribution:
N=  1: side=0.813173, contribution=0.661250
N=  2: side=0.949504, contribution=0.450779
N=  3: side=1.142031, contribution=0.434745
N=  5: side=1.443692, contribution=0.416850
N=  4: side=1.290806, contribution=0.416545
N=  7: side=1.673104, contribution=0.399897
N=  6: side=1.548438, contribution=0.399610
N=  9: side=1.867280, contribution=0.387415
N=  8: side=1.755921, contribution=0.385407
N= 15: side=2.377955, contribution=0.376978
N= 10: side=1.940696, contribution=0.376630
N= 21: side=2.811667, contribution=0.376451
N= 20: side=2.742469, contribution=0.376057
N= 22: side=2.873270, contribution=0.375258
N= 11: side=2.030803, contribution=0.374924
N= 16: side=2.446640, contribution=0.374128
N= 26: side=3.118320, contribution=0.373997
N= 12: side=2.114873, contribution=0.372724
N= 13: side=2.199960, contribution=0.372294
N= 25: side=3.050182, contribution=0.372144


In [14]:
# Search for more pre-optimized submissions in snapshots
import glob

print("Searching for more pre-optimized submissions...")

# Find all CSV files in snapshots
snapshot_csvs = glob.glob('/home/nonroot/snapshots/santa-2025/*/code/*.csv')
snapshot_csvs += glob.glob('/home/nonroot/snapshots/santa-2025/*/code/experiments/*/*.csv')
snapshot_csvs += glob.glob('/home/nonroot/snapshots/santa-2025/*/code/preoptimized/*.csv')
snapshot_csvs += glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/*.csv')

print(f"Found {len(snapshot_csvs)} CSV files")

# Sample a few to check their scores
for csv_path in snapshot_csvs[:5]:
    try:
        trees, sides = load_submission(csv_path)
        score = calculate_score(sides)
        print(f"{csv_path.split('/')[-1]}: {score:.6f}")
    except Exception as e:
        print(f"{csv_path.split('/')[-1]}: Error - {str(e)[:50]}")

Searching for more pre-optimized submissions...
Found 994 CSV files


submission.csv: 70.676102
submission.csv: 0.000000


input.csv: 70.676102


submission.csv: 70.627582


submission.csv: 70.676816


In [16]:
# Find the best submissions across all snapshots
print("Evaluating all snapshot submissions (this may take a while)...")

best_submissions = []
processed = 0

for csv_path in snapshot_csvs:
    try:
        trees, sides = load_submission(csv_path)
        score = calculate_score(sides)
        if score > 0 and score < 100:  # Filter out invalid scores
            best_submissions.append((csv_path, score, trees, sides))
        processed += 1
        if processed % 100 == 0:
            print(f"Processed {processed}/{len(snapshot_csvs)} files...")
    except Exception as e:
        pass

print(f"\nFound {len(best_submissions)} valid submissions")

# Sort by score
best_submissions.sort(key=lambda x: x[1])

# Show top 10
print("\nTop 10 submissions by score:")
for path, score, _, _ in best_submissions[:10]:
    print(f"  {score:.6f}: {path.split('/')[-1]}")

Evaluating all snapshot submissions (this may take a while)...


Processed 100/994 files...


Processed 200/994 files...


Processed 300/994 files...


Processed 400/994 files...


Processed 500/994 files...


Processed 600/994 files...


Processed 700/994 files...


Processed 800/994 files...


Processed 900/994 files...



Found 861 valid submissions

Top 10 submissions by score:
  34.220876: gap_constrained_result.csv
  34.338312: submission_sa_parallel.csv
  34.338312: submission_sa.csv
  39.508442: submission.csv
  51.663965: submission_opt.csv
  67.772662: ensemble_submission.csv
  69.653351: test_invalid.csv
  69.653351: optimized20.csv
  69.653351: optimized21.csv
  69.684578: optimized19.csv


In [17]:
# Check validity of top submissions\nprint("Checking validity of top submissions...")\n\nfor path, score, trees, sides in best_submissions[:20]:\n    overlap_count = sum(1 for n in range(1, 201) if has_overlap(trees[n]))\n    valid = "VALID" if overlap_count == 0 else f"INVALID ({overlap_count} overlaps)"\n    print(f"  {score:.6f}: {valid} - {path.split('/')[-1]}")