# Experiment 002: bbox3 Multi-Phase Optimization

This experiment implements the multi-phase optimization strategy:
1. Phase A: Short 2-minute runs with various (n, r) parameters
2. Phase B: 10-minute runs on top candidates
3. Phase C: 20-minute runs on best candidates
4. Apply fix_direction post-processing
5. Validate and repair overlaps

In [1]:
import os
import subprocess
import time
import shutil
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
from scipy.spatial import ConvexHull
from scipy.optimize import minimize_scalar

getcontext().prec = 25
scale_factor = 1  # Using simpler scale for this experiment

# Paths
BBOX3_PATH = '/home/code/bbox3'
BASELINE_CSV = '/home/data/sample_submission.csv'
WORKING_DIR = '/home/code/experiments/002_bbox3_multiphase'
SUBMISSION_DIR = '/home/submission'

os.makedirs(WORKING_DIR, exist_ok=True)
os.makedirs(SUBMISSION_DIR, exist_ok=True)

print(f"bbox3 exists: {os.path.exists(BBOX3_PATH)}")
print(f"Baseline exists: {os.path.exists(BASELINE_CSV)}")

bbox3 exists: True
Baseline exists: True


In [2]:
# Tree geometry and helper classes
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        
        initial_polygon = Polygon(zip(TX, TY))
        rotated = affinity.rotate(initial_polygon, self.angle, origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=self.center_x, yoff=self.center_y)

def load_configuration_from_df(n, df):
    """Load trees for a given N from submission DataFrame."""
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"])[1:]  # Remove 's' prefix
        y = str(row["y"])[1:]
        deg = str(row["deg"])[1:]
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def get_score(trees, n):
    """Calculate score (S^2 / N) for a configuration."""
    if not trees:
        return 0.0
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    return side_length**2 / n

def has_overlap(trees):
    """Check if any two trees overlap."""
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print("Helper classes and functions defined")

Helper classes and functions defined


In [3]:
def score_and_validate_submission(file_path, max_n=200):
    """Score and validate a submission file."""
    try:
        df = pd.read_csv(file_path)
    except Exception as e:
        return {"status": "FAILED", "error": str(e), "total_score": float('inf')}
    
    total_score = 0.0
    failed_overlap_n = []
    
    for n in range(1, max_n + 1):
        trees = load_configuration_from_df(n, df)
        if trees:
            current_score = get_score(trees, n)
            total_score += current_score
            if has_overlap(trees):
                failed_overlap_n.append(n)
    
    status = "SUCCESS" if not failed_overlap_n else "FAILED (Overlaps)"
    return {
        "status": status,
        "total_score": total_score,
        "failed_overlap_n": failed_overlap_n
    }

# Test on baseline
baseline_result = score_and_validate_submission(BASELINE_CSV)
print(f"Baseline score: {baseline_result['total_score']:.6f}")
print(f"Baseline status: {baseline_result['status']}")

Baseline score: 173.652299
Baseline status: SUCCESS


In [4]:
def run_bbox3(input_csv, output_csv, n_iterations, random_seed, timeout_seconds=None):
    """Run bbox3 optimizer with given parameters."""
    # Copy input to working directory as submission.csv (bbox3 expects this name)
    working_csv = os.path.join(os.path.dirname(output_csv), 'submission.csv')
    shutil.copy(input_csv, working_csv)
    
    # Change to working directory and run bbox3
    original_dir = os.getcwd()
    os.chdir(os.path.dirname(output_csv))
    
    cmd = [BBOX3_PATH, '-n', str(n_iterations), '-r', str(random_seed)]
    
    start_time = time.time()
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=timeout_seconds
        )
        elapsed = time.time() - start_time
        
        # Parse output for scores
        output = result.stdout
        initial_score = None
        final_score = None
        
        for line in output.split('\n'):
            if 'Initial Total Score:' in line:
                initial_score = float(line.split(':')[1].strip())
            elif 'Final Score:' in line:
                final_score = float(line.split(':')[1].strip())
        
        # Rename output
        if os.path.exists(working_csv):
            shutil.copy(working_csv, output_csv)
        
        os.chdir(original_dir)
        return {
            'success': True,
            'initial_score': initial_score,
            'final_score': final_score,
            'elapsed': elapsed,
            'output': output
        }
    except subprocess.TimeoutExpired:
        os.chdir(original_dir)
        return {'success': False, 'error': 'Timeout'}
    except Exception as e:
        os.chdir(original_dir)
        return {'success': False, 'error': str(e)}

print("bbox3 runner function defined")

bbox3 runner function defined


In [5]:
# Rotation optimization (fix_direction)
def calculate_bbox_side_at_angle(angle_deg, points):
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix_T = np.array([[c, s], [-s, c]])
    rotated_points = points.dot(rot_matrix_T)
    min_xy = np.min(rotated_points, axis=0)
    max_xy = np.max(rotated_points, axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def optimize_rotation_for_config(trees):
    """Find optimal rotation angle for a configuration."""
    if len(trees) <= 1:
        return 0.0, get_score(trees, len(trees)) if trees else 0.0
    
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    points_np = np.array(all_points)
    
    try:
        hull_points = points_np[ConvexHull(points_np).vertices]
    except:
        return 0.0, get_score(trees, len(trees))
    
    initial_side = calculate_bbox_side_at_angle(0, hull_points)
    
    res = minimize_scalar(
        lambda a: calculate_bbox_side_at_angle(a, hull_points),
        bounds=(0.001, 89.999), 
        method='bounded'
    )
    
    if res.fun < initial_side - 1e-8:
        return res.x, res.fun
    return 0.0, initial_side

def apply_rotation_to_config(trees, angle_deg, n):
    """Apply rotation to all trees in a configuration."""
    if not trees or abs(angle_deg) < 1e-9:
        return trees
    
    # Get bounding box center
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    points_np = np.array(all_points)
    center = points_np.mean(axis=0)
    
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix = np.array([[c, -s], [s, c]])
    
    rotated_trees = []
    for tree in trees:
        pos = np.array([tree.center_x, tree.center_y])
        shifted = pos - center
        rotated_pos = shifted.dot(rot_matrix.T) + center
        new_tree = ChristmasTree(
            str(rotated_pos[0]),
            str(rotated_pos[1]),
            str(tree.angle + angle_deg)
        )
        rotated_trees.append(new_tree)
    return rotated_trees

print("Rotation optimization functions defined")

Rotation optimization functions defined


In [6]:
def fix_direction(input_csv, output_csv):
    """Apply rotation optimization to all configurations."""
    df = pd.read_csv(input_csv)
    
    total_improvement = 0
    improved_configs = 0
    
    all_rows = []
    
    for n in range(1, 201):
        trees = load_configuration_from_df(n, df)
        if not trees:
            continue
        
        original_score = get_score(trees, n)
        best_angle, best_side = optimize_rotation_for_config(trees)
        
        if best_angle > 0.001:
            rotated_trees = apply_rotation_to_config(trees, best_angle, n)
            new_score = get_score(rotated_trees, n)
            
            if new_score < original_score:
                trees = rotated_trees
                improvement = original_score - new_score
                total_improvement += improvement
                improved_configs += 1
        
        # Add to output
        for t_idx, tree in enumerate(trees):
            all_rows.append({
                'id': f'{n:03d}_{t_idx}',
                'x': f's{tree.center_x}',
                'y': f's{tree.center_y}',
                'deg': f's{tree.angle}'
            })
    
    # Save
    out_df = pd.DataFrame(all_rows)
    out_df.to_csv(output_csv, index=False)
    
    return {
        'total_improvement': total_improvement,
        'improved_configs': improved_configs
    }

print("fix_direction function defined")

fix_direction function defined


In [7]:
def repair_overlaps(new_csv, baseline_csv, output_csv, failed_n_list):
    """Replace overlapping configurations with baseline."""
    if not failed_n_list:
        shutil.copy(new_csv, output_csv)
        return 0
    
    df_new = pd.read_csv(new_csv)
    df_baseline = pd.read_csv(baseline_csv)
    
    failed_prefixes = [f"{n:03d}_" for n in failed_n_list]
    
    df_to_keep = df_new[~df_new["id"].str.startswith(tuple(failed_prefixes))]
    df_replacement = df_baseline[df_baseline["id"].str.startswith(tuple(failed_prefixes))]
    
    df_repaired = pd.concat([df_to_keep, df_replacement]).sort_values(by="id").reset_index(drop=True)
    df_repaired.to_csv(output_csv, index=False)
    
    return len(failed_n_list)

print("repair_overlaps function defined")

repair_overlaps function defined


In [8]:
# Phase A: Short runs to find promising parameters
print("="*60)
print("PHASE A: Parameter Search (2-minute runs)")
print("="*60)

# Parameters to try
n_values = [500, 800, 1000, 1200]  # Reduced for faster exploration
r_values = [30, 60, 90, 120]

phase_a_results = []

# Start from baseline
shutil.copy(BASELINE_CSV, os.path.join(WORKING_DIR, 'current_best.csv'))

for n_iter in n_values:
    for r_seed in r_values:
        print(f"\nTrying n={n_iter}, r={r_seed}...")
        
        output_file = os.path.join(WORKING_DIR, f'phase_a_n{n_iter}_r{r_seed}.csv')
        
        result = run_bbox3(
            os.path.join(WORKING_DIR, 'current_best.csv'),
            output_file,
            n_iterations=n_iter,
            random_seed=r_seed,
            timeout_seconds=180  # 3 minute timeout
        )
        
        if result['success'] and result['final_score']:
            phase_a_results.append({
                'n': n_iter,
                'r': r_seed,
                'initial': result['initial_score'],
                'final': result['final_score'],
                'elapsed': result['elapsed'],
                'file': output_file
            })
            print(f"  Score: {result['initial_score']:.4f} -> {result['final_score']:.4f} ({result['elapsed']:.1f}s)")
            
            # Update current best if improved
            if result['final_score'] < phase_a_results[0]['final'] if len(phase_a_results) == 1 else result['final_score'] < min(r['final'] for r in phase_a_results[:-1]):
                shutil.copy(output_file, os.path.join(WORKING_DIR, 'current_best.csv'))
                print(f"  -> New best!")
        else:
            print(f"  Failed: {result.get('error', 'Unknown')}")

# Sort by final score
phase_a_results.sort(key=lambda x: x['final'])
print("\n" + "="*60)
print("Phase A Results (sorted by score):")
for i, r in enumerate(phase_a_results[:6]):
    print(f"  {i+1}. n={r['n']}, r={r['r']}: {r['final']:.4f}")

PHASE A: Parameter Search (2-minute runs)

Trying n=500, r=30...


  Score: 173.6523 -> 156.6328 (130.1s)

Trying n=500, r=60...


  Score: 173.6523 -> 156.7762 (171.9s)

Trying n=500, r=90...


  Failed: Timeout

Trying n=500, r=120...


  Failed: Timeout

Trying n=800, r=30...


  Score: 173.6523 -> 155.8959 (123.3s)
  -> New best!

Trying n=800, r=60...


  Score: 155.8959 -> 153.6834 (125.3s)
  -> New best!

Trying n=800, r=90...


  Score: 153.6834 -> 152.1368 (172.6s)
  -> New best!

Trying n=800, r=120...


  Failed: Timeout

Trying n=1000, r=30...


  Score: 152.1368 -> 150.3605 (77.3s)
  -> New best!

Trying n=1000, r=60...


  Score: 150.3605 -> 149.2697 (131.0s)
  -> New best!

Trying n=1000, r=90...


  Failed: Timeout

Trying n=1000, r=120...


  Failed: Timeout

Trying n=1200, r=30...


  Score: 149.2697 -> 148.2737 (79.8s)
  -> New best!

Trying n=1200, r=60...


  Score: 148.2737 -> 147.3649 (134.1s)
  -> New best!

Trying n=1200, r=90...


  Failed: Timeout

Trying n=1200, r=120...


  Failed: Timeout

Phase A Results (sorted by score):
  1. n=1200, r=60: 147.3649
  2. n=1200, r=30: 148.2737
  3. n=1000, r=60: 149.2697
  4. n=1000, r=30: 150.3605
  5. n=800, r=90: 152.1368
  6. n=800, r=60: 153.6834


In [None]:
# Phase B: Longer runs on top candidates
print("\n" + "="*60)
print("PHASE B: Extended Optimization (5-minute runs)")
print("="*60)

# Take top 3 from Phase A
top_candidates = phase_a_results[:3] if len(phase_a_results) >= 3 else phase_a_results

phase_b_results = []

for i, candidate in enumerate(top_candidates):
    print(f"\nCandidate {i+1}: Starting from n={candidate['n']}, r={candidate['r']} (score={candidate['final']:.4f})")
    
    # Run with higher iterations
    n_iter = 2000
    r_seed = candidate['r'] + 1000  # Different seed for variety
    
    output_file = os.path.join(WORKING_DIR, f'phase_b_candidate{i+1}.csv')
    
    result = run_bbox3(
        candidate['file'],
        output_file,
        n_iterations=n_iter,
        random_seed=r_seed,
        timeout_seconds=360  # 6 minute timeout
    )
    
    if result['success'] and result['final_score']:
        phase_b_results.append({
            'candidate': i+1,
            'initial': result['initial_score'],
            'final': result['final_score'],
            'elapsed': result['elapsed'],
            'file': output_file
        })
        print(f"  Score: {result['initial_score']:.4f} -> {result['final_score']:.4f} ({result['elapsed']:.1f}s)")
    else:
        print(f"  Failed: {result.get('error', 'Unknown')}")

# Sort by final score
phase_b_results.sort(key=lambda x: x['final'])
print("\n" + "="*60)
print("Phase B Results:")
for r in phase_b_results:
    print(f"  Candidate {r['candidate']}: {r['final']:.4f}")

In [None]:
# Phase C: Final optimization on best candidate
print("\n" + "="*60)
print("PHASE C: Final Optimization (10-minute run)")
print("="*60)

if phase_b_results:
    best_candidate = phase_b_results[0]
    print(f"\nStarting from best Phase B result: {best_candidate['final']:.4f}")
    
    output_file = os.path.join(WORKING_DIR, 'phase_c_final.csv')
    
    result = run_bbox3(
        best_candidate['file'],
        output_file,
        n_iterations=3000,
        random_seed=42,
        timeout_seconds=720  # 12 minute timeout
    )
    
    if result['success'] and result['final_score']:
        print(f"  Score: {result['initial_score']:.4f} -> {result['final_score']:.4f} ({result['elapsed']:.1f}s)")
        best_file = output_file
        best_score = result['final_score']
    else:
        print(f"  Failed, using Phase B result")
        best_file = best_candidate['file']
        best_score = best_candidate['final']
else:
    print("No Phase B results, using Phase A best")
    best_file = phase_a_results[0]['file']
    best_score = phase_a_results[0]['final']

print(f"\nBest score before post-processing: {best_score:.4f}")

In [None]:
# Post-processing: Apply fix_direction
print("\n" + "="*60)
print("POST-PROCESSING: Rotation Optimization")
print("="*60)

fixed_file = os.path.join(WORKING_DIR, 'fixed_direction.csv')
fix_result = fix_direction(best_file, fixed_file)

print(f"Improved {fix_result['improved_configs']} configurations")
print(f"Total score improvement: {fix_result['total_improvement']:.4f}")

# Validate
validation = score_and_validate_submission(fixed_file)
print(f"\nScore after fix_direction: {validation['total_score']:.4f}")
print(f"Overlaps: {len(validation['failed_overlap_n'])}")

In [None]:
# Repair any overlaps
print("\n" + "="*60)
print("OVERLAP REPAIR")
print("="*60)

final_file = os.path.join(WORKING_DIR, 'final_submission.csv')

if validation['failed_overlap_n']:
    print(f"Repairing {len(validation['failed_overlap_n'])} configurations with overlaps...")
    repair_overlaps(fixed_file, BASELINE_CSV, final_file, validation['failed_overlap_n'])
else:
    print("No overlaps to repair")
    shutil.copy(fixed_file, final_file)

# Final validation
final_validation = score_and_validate_submission(final_file)
print(f"\nFinal score: {final_validation['total_score']:.4f}")
print(f"Final status: {final_validation['status']}")

In [None]:
# Copy to submission directory
shutil.copy(final_file, os.path.join(SUBMISSION_DIR, 'submission.csv'))

print("\n" + "="*60)
print("EXPERIMENT SUMMARY")
print("="*60)
print(f"Baseline score: {baseline_result['total_score']:.4f}")
print(f"Final score: {final_validation['total_score']:.4f}")
print(f"Improvement: {baseline_result['total_score'] - final_validation['total_score']:.4f}")
print(f"Improvement %: {100*(baseline_result['total_score'] - final_validation['total_score'])/baseline_result['total_score']:.2f}%")
print(f"Target: 68.922808")
print(f"Gap to target: {final_validation['total_score'] - 68.922808:.4f}")
print("="*60)

In [None]:
# Model wrapper for logging
class Bbox3Optimizer:
    def __init__(self, data='single'):
        self.data = data
    
    def optimize(self):
        pass

model = Bbox3Optimizer(data='single')
print("Model wrapper created")