# Experiment 001: Baseline with bbox3 Optimizer

This experiment implements the baseline workflow from top kernels:
1. Start with pre-optimized baseline submission
2. Run bbox3 optimizer with moderate parameters
3. Apply fix_direction post-processing
4. Validate for overlaps and repair if needed

In [1]:
import numpy as np
import pandas as pd
import os
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
from scipy.spatial import ConvexHull
from scipy.optimize import minimize_scalar
import subprocess
import time

getcontext().prec = 30
scale_factor = 1

# Working directory
os.chdir('/home/code/experiments/001_baseline')
print(f'Working directory: {os.getcwd()}')
print(f'Files: {os.listdir()}')

Working directory: /home/code/experiments/001_baseline
Files: ['bbox3', 'submission.csv', 'baseline.ipynb']


In [2]:
# Christmas Tree class from the kernel
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        """Initializes the Christmas tree with a specific position and rotation."""
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon(
            [
                (Decimal('0.0') * scale_factor, tip_y * scale_factor),
                (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
                (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
                (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
                (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
                (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
                (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
                (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
                (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
            ]
        )
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

    def clone(self):
        return ChristmasTree(
            center_x=str(self.center_x),
            center_y=str(self.center_y),
            angle=str(self.angle),
        )

print('ChristmasTree class defined')

ChristmasTree class defined


In [3]:
# Helper functions
def get_tree_list_side_length(tree_list):
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(max(bounds[2] - bounds[0], bounds[3] - bounds[1])) / scale_factor

def get_total_score(dict_of_side_length):
    score = 0
    for k, v in dict_of_side_length.items():
        score += v ** 2 / Decimal(k)
    return score

def parse_csv(csv_path):
    print(f'Parsing: {csv_path}')
    result = pd.read_csv(csv_path)
    result['x'] = result['x'].str.strip('s')
    result['y'] = result['y'].str.strip('s')
    result['deg'] = result['deg'].str.strip('s')
    result[['group_id', 'item_id']] = result['id'].str.split('_', n=2, expand=True)

    dict_of_tree_list = {}
    dict_of_side_length = {}
    for group_id, group_data in result.groupby('group_id'):
        tree_list = [ChristmasTree(center_x=row['x'], center_y=row['y'], angle=row['deg'])
                     for _, row in group_data.iterrows()]
        dict_of_tree_list[group_id] = tree_list
        dict_of_side_length[group_id] = get_tree_list_side_length(tree_list)

    return dict_of_tree_list, dict_of_side_length

def load_configuration_from_df(n, df):
    group_data = df[df['id'].str.startswith(f'{n:03d}_')]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row['x'])[1:]
        y = str(row['y'])[1:]
        deg = str(row['deg'])[1:]
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def get_score(trees, n):
    if not trees:
        return 0.0
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    return side_length**2 / n

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print('Helper functions defined')

Helper functions defined


In [4]:
# Calculate initial score
dict_of_tree_list, dict_of_side_length = parse_csv('submission.csv')
initial_score = float(get_total_score(dict_of_side_length))
print(f'Initial score: {initial_score:.6f}')

Parsing: submission.csv


Initial score: 70.734327


In [5]:
# fix_direction functions
def calculate_bbox_side_at_angle(angle_deg, points):
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix_T = np.array([[c, s], [-s, c]])
    rotated_points = points.dot(rot_matrix_T)
    min_xy = np.min(rotated_points, axis=0)
    max_xy = np.max(rotated_points, axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def optimize_rotation(trees):
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    points_np = np.array(all_points)
    hull_points = points_np[ConvexHull(points_np).vertices]
    initial_side = calculate_bbox_side_at_angle(0, hull_points)
    res = minimize_scalar(lambda a: calculate_bbox_side_at_angle(a, hull_points),
                          bounds=(0.001, 89.999), method='bounded')
    found_angle_deg = res.x
    found_side = res.fun
    improvement = initial_side - found_side
    EPSILON = 1e-8
    if improvement > EPSILON:
        best_angle_deg = found_angle_deg
        best_side = Decimal(found_side) / scale_factor
    else:
        best_angle_deg = 0.0
        best_side = Decimal(initial_side) / scale_factor
    return best_side, best_angle_deg

def apply_rotation(trees, angle_deg):
    if not trees or abs(angle_deg) < 1e-9:
        return [t.clone() for t in trees]
    bounds = [t.polygon.bounds for t in trees]
    min_x = min(b[0] for b in bounds)
    min_y = min(b[1] for b in bounds)
    max_x = max(b[2] for b in bounds)
    max_y = max(b[3] for b in bounds)
    rotation_center = np.array([(min_x + max_x) / 2.0, (min_y + max_y) / 2.0])
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix = np.array([[c, -s], [s, c]])
    points = np.array([[float(t.center_x), float(t.center_y)] for t in trees])
    shifted = points - rotation_center
    rotated = shifted.dot(rot_matrix.T) + rotation_center
    rotated_trees = []
    for i in range(len(trees)):
        new_tree = ChristmasTree(Decimal(rotated[i, 0]), Decimal(rotated[i, 1]),
                                 Decimal(trees[i].angle + Decimal(angle_deg)))
        rotated_trees.append(new_tree)
    return rotated_trees

print('fix_direction functions defined')

fix_direction functions defined


In [6]:
def fix_direction(current_solution_path='submission.csv', out_file='submission.csv'):
    # Load current best solution
    dict_of_tree_list, dict_of_side_length = parse_csv(current_solution_path)
    current_score = get_total_score(dict_of_side_length)
    print(f'Current score: {float(current_score):.6f}')

    # Get optimal rotation angle from 4-tree configuration
    initial_trees = [
        ChristmasTree(1, 0, 0),
        ChristmasTree(0, 1, 90),
        ChristmasTree(-1, 0, 180),
        ChristmasTree(0, -1, 270)
    ]
    best_side, best_angle_deg = optimize_rotation(initial_trees)
    fixed_trees = apply_rotation(initial_trees, best_angle_deg)
    print(f'Optimal rotation angle: {best_angle_deg:.6f} degrees')

    # Apply rotation to all groups
    new_dict_of_tree_list = {}
    new_dict_of_side_length = {}
    for group_id, tree_list in dict_of_tree_list.items():
        new_side, new_angle = optimize_rotation(tree_list)
        if new_side < dict_of_side_length[group_id]:
            new_tree_list = apply_rotation(tree_list, new_angle)
            new_dict_of_tree_list[group_id] = new_tree_list
            new_dict_of_side_length[group_id] = new_side
        else:
            new_dict_of_tree_list[group_id] = tree_list
            new_dict_of_side_length[group_id] = dict_of_side_length[group_id]

    new_score = get_total_score(new_dict_of_side_length)
    print(f'New score after fix_direction: {float(new_score):.6f}')
    print(f'Improvement: {float(current_score - new_score):.6f}')

    # Save to file
    rows = []
    for group_id in sorted(new_dict_of_tree_list.keys(), key=lambda x: int(x)):
        for i, tree in enumerate(new_dict_of_tree_list[group_id]):
            rows.append({
                'id': f'{group_id}_{i}',
                'x': f's{tree.center_x}',
                'y': f's{tree.center_y}',
                'deg': f's{tree.angle}'
            })
    df = pd.DataFrame(rows)
    df.to_csv(out_file, index=False)
    print(f'Saved to {out_file}')
    return float(new_score)

print('fix_direction function defined')

fix_direction function defined


In [7]:
# Run bbox3 optimizer
print('Running bbox3 optimizer...')
start_time = time.time()

# Run with moderate parameters: -n 2000 -r 96
result = subprocess.run(
    ['./bbox3', '-n', '2000', '-r', '96'],
    capture_output=True,
    text=True,
    timeout=3600  # 1 hour timeout
)

print(f'bbox3 completed in {time.time() - start_time:.1f} seconds')
print('stdout:', result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout)
if result.stderr:
    print('stderr:', result.stderr[-500:] if len(result.stderr) > 500 else result.stderr)

Running bbox3 optimizer...
bbox3 completed in 0.0 seconds
stdout: 
stderr: ./bbox3: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by ./bbox3)



In [8]:
# Since bbox3 has GLIBC issues, let's use the precomputed submission
# and apply fix_direction to it
import shutil

# Copy precomputed submission
shutil.copy('/home/nonroot/snapshots/santa-2025/21104669204/code/precomputed_submission.csv', 'submission.csv')

# Calculate its score
dict_of_tree_list, dict_of_side_length = parse_csv('submission.csv')
precomputed_score = float(get_total_score(dict_of_side_length))
print(f'Precomputed submission score: {precomputed_score:.6f}')

Parsing: submission.csv


Precomputed submission score: 70.743774


In [9]:
# Go back to original baseline and apply fix_direction
shutil.copy('/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv', 'submission.csv')

# Verify score
dict_of_tree_list, dict_of_side_length = parse_csv('submission.csv')
baseline_score = float(get_total_score(dict_of_side_length))
print(f'Original baseline score: {baseline_score:.6f}')

Parsing: submission.csv


Original baseline score: 70.734327


In [10]:
# Apply fix_direction post-processing (multiple passes)
print('Applying fix_direction pass 1...')
score_pass1 = fix_direction('submission.csv', 'submission.csv')

print('\\nApplying fix_direction pass 2...')
score_pass2 = fix_direction('submission.csv', 'submission.csv')

print('\\nApplying fix_direction pass 3...')
score_pass3 = fix_direction('submission.csv', 'submission.csv')

Applying fix_direction pass 1...
Parsing: submission.csv


Current score: 70.734327
Optimal rotation angle: 28.810794 degrees


New score after fix_direction: 70.734327
Improvement: 0.000000
Saved to submission.csv
\nApplying fix_direction pass 2...
Parsing: submission.csv


Current score: 70.734327
Optimal rotation angle: 28.810794 degrees


New score after fix_direction: 70.734327
Improvement: 0.000000
Saved to submission.csv
\nApplying fix_direction pass 3...
Parsing: submission.csv


Current score: 70.734327
Optimal rotation angle: 28.810794 degrees


New score after fix_direction: 70.734327
Improvement: 0.000000
Saved to submission.csv


In [12]:
# Check scores of various submissions in snapshots
import glob

submission_files = [
    '/home/nonroot/snapshots/santa-2025/20992536951/code/submission.csv',
    '/home/nonroot/snapshots/santa-2025/20992536951/code/submission_v21.csv',
    '/home/nonroot/snapshots/santa-2025/20992536951/code/submission_bp.csv',
    '/home/nonroot/snapshots/santa-2025/21086827828/code/submission.csv',
    '/home/nonroot/snapshots/santa-2025/21086827828/code/submission_v21.csv',
]

best_score = float('inf')
best_file = None

for f in submission_files:
    try:
        dict_of_tree_list, dict_of_side_length = parse_csv(f)
        score = float(get_total_score(dict_of_side_length))
        print(f'{f}: {score:.6f}')
        if score < best_score:
            best_score = score
            best_file = f
    except Exception as e:
        print(f'{f}: Error - {e}')

print(f'\nBest score: {best_score:.6f}')
print(f'Best file: {best_file}')

Parsing: /home/nonroot/snapshots/santa-2025/20992536951/code/submission.csv


/home/nonroot/snapshots/santa-2025/20992536951/code/submission.csv: 118.183895
Parsing: /home/nonroot/snapshots/santa-2025/20992536951/code/submission_v21.csv


/home/nonroot/snapshots/santa-2025/20992536951/code/submission_v21.csv: 118.193395
Parsing: /home/nonroot/snapshots/santa-2025/20992536951/code/submission_bp.csv


/home/nonroot/snapshots/santa-2025/20992536951/code/submission_bp.csv: 118.183895
Parsing: /home/nonroot/snapshots/santa-2025/21086827828/code/submission.csv
/home/nonroot/snapshots/santa-2025/21086827828/code/submission.csv: Error - Can only use .str accessor with string values!
Parsing: /home/nonroot/snapshots/santa-2025/21086827828/code/submission_v21.csv
/home/nonroot/snapshots/santa-2025/21086827828/code/submission_v21.csv: Error - Can only use .str accessor with string values!

Best score: 118.183895
Best file: /home/nonroot/snapshots/santa-2025/20992536951/code/submission.csv


In [13]:
# Check the bucket-of-chump submission
bucket_file = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/bucket-of-chump/submission.csv'
dict_of_tree_list, dict_of_side_length = parse_csv(bucket_file)
bucket_score = float(get_total_score(dict_of_side_length))
print(f'Bucket-of-chump submission score: {bucket_score:.6f}')

Parsing: /home/nonroot/snapshots/santa-2025/21105319338/code/datasets/bucket-of-chump/submission.csv


Bucket-of-chump submission score: 70.750676


In [None]:
# Calculate score after bbox3
dict_of_tree_list, dict_of_side_length = parse_csv('submission.csv')
bbox3_score = float(get_total_score(dict_of_side_length))
print(f'Score after bbox3: {bbox3_score:.6f}')

In [None]:
# Apply fix_direction post-processing (2 passes)
print('\nApplying fix_direction pass 1...')
score_pass1 = fix_direction('submission.csv', 'submission.csv')

print('\nApplying fix_direction pass 2...')
score_pass2 = fix_direction('submission.csv', 'submission.csv')

In [None]:
# Validate for overlaps
def score_and_validate_submission(file_path, max_n=200):
    try:
        df = pd.read_csv(file_path)
    except Exception as e:
        print(f'Error reading CSV: {e}')
        return {'status': 'FAILED', 'error': str(e)}

    total_score = 0.0
    failed_overlap_n = []

    for n in range(1, max_n + 1):
        trees = load_configuration_from_df(n, df)
        if trees:
            current_score = get_score(trees, n)
            total_score += current_score
            if has_overlap(trees):
                failed_overlap_n.append(n)

    print(f'Total Score: {total_score:.6f}')
    if failed_overlap_n:
        print(f'Overlaps found in N: {failed_overlap_n}')
        status = 'FAILED (Overlaps)'
    else:
        print('No overlaps detected')
        status = 'SUCCESS'

    return {
        'status': status,
        'total_score': total_score,
        'failed_overlap_n': failed_overlap_n
    }

print('Validating submission...')
result = score_and_validate_submission('submission.csv')

In [None]:
# Repair overlaps if needed
import csv

def load_groups(filename):
    groups = {}
    with open(filename, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        header = next(reader)
        for row in reader:
            full_id = row[0]
            group = full_id.split('_')[0]
            groups.setdefault(group, []).append(row)
    return header, groups

def replace_group(target_file, donor_file, group_id, output_file=None):
    if output_file is None:
        output_file = target_file
    header_t, groups_t = load_groups(target_file)
    header_d, groups_d = load_groups(donor_file)
    if group_id not in groups_d:
        raise ValueError(f'Donor file missing group {group_id}')
    groups_t[group_id] = groups_d[group_id]
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(header_t)
        for g in sorted(groups_t.keys(), key=lambda x: int(x)):
            for row in groups_t[g]:
                writer.writerow(row)
    print(f'Replaced group {group_id}')

# Repair overlaps using baseline as donor
if result['failed_overlap_n']:
    print('Repairing overlaps...')
    # Copy original baseline for donor
    import shutil
    shutil.copy('/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv', 'donor.csv')
    for n in result['failed_overlap_n']:
        replace_group('submission.csv', 'donor.csv', f'{n:03d}')
    # Re-validate
    print('\nRe-validating after repair...')
    result = score_and_validate_submission('submission.csv')
else:
    print('No overlaps to repair')

In [None]:
# Final score
final_score = result['total_score']
print(f'\n=== FINAL RESULTS ===')
print(f'Initial score: {initial_score:.6f}')
print(f'Final score: {final_score:.6f}')
print(f'Improvement: {initial_score - final_score:.6f}')
print(f'Status: {result["status"]}')

In [None]:
# Copy to submission folder
import shutil
shutil.copy('submission.csv', '/home/submission/submission.csv')
print('Submission saved to /home/submission/submission.csv')