# Baseline Experiment: bbox3 + fix_direction

This notebook implements the baseline approach from top kernels:
1. Use bbox3 binary optimizer
2. Apply fix_direction rotation optimization
3. Validate and score

In [1]:
import shutil
import os

# Setup working directory
os.chdir('/home/code/experiments/001_baseline')

# Copy bbox3 binary and starting submission
shutil.copy('/home/code/datasets/bucket-of-chump/bbox3', './bbox3')
shutil.copy('/home/code/datasets/santa-2025-csv/santa-2025.csv', './submission.csv')

print('Files copied successfully')
print('Starting submission:', os.path.getsize('./submission.csv'), 'bytes')

Files copied successfully
Starting submission: 1525109 bytes


In [2]:
# Make bbox3 executable and check it
import subprocess

subprocess.run(['chmod', '+x', './bbox3'])
result = subprocess.run(['./bbox3', '--help'], capture_output=True, text=True)
print(result.stdout if result.stdout else result.stderr)

./bbox3: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by ./bbox3)



In [3]:
# Run bbox3 with moderate settings
# -n: iterations, -r: restarts
import time

start_time = time.time()
result = subprocess.run(['./bbox3', '-n', '2000', '-r', '60'], capture_output=True, text=True)
end_time = time.time()

print(f'bbox3 completed in {end_time - start_time:.1f} seconds')
print('Output:', result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout)
if result.stderr:
    print('Errors:', result.stderr[-500:])

bbox3 completed in 0.0 seconds
Output: 
Errors: ./bbox3: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by ./bbox3)



In [None]:
# Core classes and functions for scoring and validation
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
from shapely.ops import unary_union
from scipy.spatial import ConvexHull
from scipy.optimize import minimize_scalar

getcontext().prec = 30
scale_factor = Decimal('1e18')

class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated, 
            xoff=float(self.center_x * scale_factor), 
            yoff=float(self.center_y * scale_factor)
        )

    def clone(self):
        return ChristmasTree(
            center_x=str(self.center_x),
            center_y=str(self.center_y),
            angle=str(self.angle),
        )

print('ChristmasTree class defined')

In [None]:
def load_configuration_from_df(n, df):
    """Loads all trees for a given N from the submission DataFrame."""
    group_data = df[df['id'].str.startswith(f'{n:03d}_')]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row['x'])[1:] if str(row['x']).startswith('s') else str(row['x'])
        y = str(row['y'])[1:] if str(row['y']).startswith('s') else str(row['y'])
        deg = str(row['deg'])[1:] if str(row['deg']).startswith('s') else str(row['deg'])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def get_tree_list_side_length(tree_list):
    """Get the side length of the bounding square."""
    if not tree_list:
        return Decimal('0')
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(str(max(bounds[2] - bounds[0], bounds[3] - bounds[1]))) / scale_factor

def get_score(trees, n):
    """Calculates the score (S^2 / N) for a given configuration."""
    if not trees:
        return 0.0
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    return side_length**2 / n

def has_overlap(trees):
    """Check if any two ChristmasTree polygons overlap."""
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print('Helper functions defined')

In [None]:
def score_and_validate_submission(file_path, max_n=200):
    """Reads a submission CSV, calculates the total score, and checks for overlaps."""
    try:
        df = pd.read_csv(file_path)
    except Exception as e:
        print(f'Error reading CSV: {e}')
        return {'status': 'FAILED', 'error': str(e)}

    total_score = 0.0
    failed_overlap_n = []
    
    print(f'--- Scoring and Validation: {file_path} (N=1 to {max_n}) ---')

    for n in range(1, max_n + 1):
        trees = load_configuration_from_df(n, df)
        if trees:
            current_score = get_score(trees, n)
            total_score += current_score
            if has_overlap(trees):
                failed_overlap_n.append(n)
                print(f'  ❌ N={n:03d}: OVERLAP DETECTED! (Score: {current_score:.6f})')
        
    print('\n--- Summary ---')
    if failed_overlap_n:
        print(f'❌ Validation FAILED: Overlaps found in N: {failed_overlap_n}')
        status = 'FAILED (Overlaps)'
    else:
        print('✅ Validation SUCCESSFUL: No overlaps detected.')
        status = 'SUCCESS'
        
    print(f'**Total Score (Σ S²/N): {total_score:.10f}**')
    
    return {
        'status': status,
        'total_score': total_score,
        'failed_overlap_n': failed_overlap_n
    }

print('Validation function defined')

In [None]:
# Score the submission after bbox3 optimization
result_after_bbox3 = score_and_validate_submission('submission.csv', max_n=200)
print(f"\nScore after bbox3: {result_after_bbox3['total_score']:.10f}")

In [None]:
# fix_direction functions
def calculate_bbox_side_at_angle(angle_deg, points):
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix_T = np.array([[c, s], [-s, c]])
    rotated_points = points.dot(rot_matrix_T)
    min_xy = np.min(rotated_points, axis=0)
    max_xy = np.max(rotated_points, axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def optimize_rotation(trees):
    """Find optimal rotation angle to minimize bounding box."""
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    points_np = np.array(all_points)
    hull_points = points_np[ConvexHull(points_np).vertices]
    
    initial_side = calculate_bbox_side_at_angle(0, hull_points)
    
    res = minimize_scalar(
        lambda a: calculate_bbox_side_at_angle(a, hull_points),
        bounds=(0.001, 89.999), method='bounded'
    )
    found_angle_deg = res.x
    found_side = res.fun
    
    improvement = initial_side - found_side
    EPSILON = 1e-8
    
    if improvement > EPSILON:
        return Decimal(str(found_side)) / scale_factor, found_angle_deg
    else:
        return Decimal(str(initial_side)) / scale_factor, 0.0

def apply_rotation(trees, angle_deg):
    """Apply rotation to all trees around their collective center."""
    if not trees or abs(angle_deg) < 1e-9:
        return [t.clone() for t in trees]
    
    bounds = [t.polygon.bounds for t in trees]
    min_x = min(b[0] for b in bounds)
    min_y = min(b[1] for b in bounds)
    max_x = max(b[2] for b in bounds)
    max_y = max(b[3] for b in bounds)
    rotation_center = np.array([(min_x + max_x) / 2.0, (min_y + max_y) / 2.0])
    
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix = np.array([[c, -s], [s, c]])
    
    points = np.array([[float(t.center_x), float(t.center_y)] for t in trees])
    shifted = points - rotation_center / float(scale_factor)
    rotated = shifted.dot(rot_matrix.T) + rotation_center / float(scale_factor)
    
    rotated_trees = []
    for i in range(len(trees)):
        new_tree = ChristmasTree(
            str(rotated[i, 0]), 
            str(rotated[i, 1]),
            str(float(trees[i].angle) + angle_deg)
        )
        rotated_trees.append(new_tree)
    return rotated_trees

print('fix_direction functions defined')

In [None]:
def fix_direction(input_path='submission.csv', output_path='submission.csv', passes=2):
    """Apply rotation optimization to all configurations."""
    df = pd.read_csv(input_path)
    
    # Parse all groups
    df['x_val'] = df['x'].str.strip('s')
    df['y_val'] = df['y'].str.strip('s')
    df['deg_val'] = df['deg'].str.strip('s')
    df[['group_id', 'item_id']] = df['id'].str.split('_', n=2, expand=True)
    
    dict_of_tree_list = {}
    dict_of_side_length = {}
    
    for group_id, group_data in df.groupby('group_id'):
        tree_list = [ChristmasTree(row['x_val'], row['y_val'], row['deg_val'])
                     for _, row in group_data.iterrows()]
        dict_of_tree_list[group_id] = tree_list
        dict_of_side_length[group_id] = get_tree_list_side_length(tree_list)
    
    # Calculate initial score
    initial_score = sum(v**2 / Decimal(k) for k, v in dict_of_side_length.items())
    print(f'Initial score: {float(initial_score):.10f}')
    
    # Apply fix_direction for multiple passes
    for pass_num in range(passes):
        improvements = 0
        for group_id in sorted(dict_of_tree_list.keys()):
            trees = dict_of_tree_list[group_id]
            if len(trees) <= 1:
                continue
            
            best_side, best_angle = optimize_rotation(trees)
            
            if best_angle > 0.001:
                rotated_trees = apply_rotation(trees, best_angle)
                new_side = get_tree_list_side_length(rotated_trees)
                
                if new_side < dict_of_side_length[group_id]:
                    dict_of_tree_list[group_id] = rotated_trees
                    dict_of_side_length[group_id] = new_side
                    improvements += 1
        
        current_score = sum(v**2 / Decimal(k) for k, v in dict_of_side_length.items())
        print(f'Pass {pass_num + 1}: {improvements} improvements, score: {float(current_score):.10f}')
    
    # Save results
    rows = []
    for group_id in sorted(dict_of_tree_list.keys(), key=lambda x: int(x)):
        for i, tree in enumerate(dict_of_tree_list[group_id]):
            rows.append({
                'id': f'{group_id}_{i}',
                'x': f's{tree.center_x}',
                'y': f's{tree.center_y}',
                'deg': f's{tree.angle}'
            })
    
    result_df = pd.DataFrame(rows)
    result_df.to_csv(output_path, index=False)
    
    final_score = sum(v**2 / Decimal(k) for k, v in dict_of_side_length.items())
    print(f'\nFinal score after fix_direction: {float(final_score):.10f}')
    return float(final_score)

print('fix_direction main function defined')

In [None]:
# Apply fix_direction with 2 passes
final_score = fix_direction('submission.csv', 'submission.csv', passes=2)

In [None]:
# Final validation
final_result = score_and_validate_submission('submission.csv', max_n=200)
print(f"\nFinal validated score: {final_result['total_score']:.10f}")

In [None]:
# Replace any overlapping configurations with donor file
def replace_invalid_configurations(new_csv_path, good_csv_path, output_csv_path, failed_n_list):
    if not failed_n_list:
        print('No overlaps to fix')
        return
    
    df_new = pd.read_csv(new_csv_path)
    df_good = pd.read_csv(good_csv_path)
    failed_prefixes = [f'{n:03d}_' for n in failed_n_list]
    df_to_keep = df_new[~df_new['id'].str.startswith(tuple(failed_prefixes))]
    df_replacement = df_good[df_good['id'].str.startswith(tuple(failed_prefixes))]
    df_repaired = pd.concat([df_to_keep, df_replacement]).sort_values(by='id').reset_index(drop=True)
    df_repaired.to_csv(output_csv_path, index=False)
    print(f'Replaced {len(failed_n_list)} configurations with donor file')

if final_result['failed_overlap_n']:
    replace_invalid_configurations(
        'submission.csv',
        '/home/code/datasets/santa-2025-csv/santa-2025.csv',
        'submission.csv',
        final_result['failed_overlap_n']
    )
    # Re-validate
    final_result = score_and_validate_submission('submission.csv', max_n=200)
    print(f"\nScore after repair: {final_result['total_score']:.10f}")

In [None]:
# Copy to submission folder
import shutil
shutil.copy('submission.csv', '/home/submission/submission.csv')
print(f"Final submission saved to /home/submission/submission.csv")
print(f"Final score: {final_result['total_score']:.10f}")