# Baseline Experiment - Santa 2025 Christmas Tree Packing

## Goals:
1. Load and validate sample_submission.csv
2. Calculate baseline score
3. Set up C++ optimizer (bbox3.cpp)
4. Run optimization and generate improved submission

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import os

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def make_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg degrees."""
    p = Polygon(zip(TX, TY))
    p = affinity.rotate(p, deg, origin=(0, 0))
    p = affinity.translate(p, x, y)
    return p

def load_configuration(df, n):
    """Load configuration for n trees from dataframe."""
    prefix = f"{n:03d}_"
    group = df[df['id'].str.startswith(prefix)].copy()
    
    trees = []
    for _, row in group.iterrows():
        x = float(str(row['x'])[1:])  # Strip 's' prefix
        y = float(str(row['y'])[1:])
        deg = float(str(row['deg'])[1:])
        trees.append({'x': x, 'y': y, 'deg': deg, 'polygon': make_tree_polygon(x, y, deg)})
    return trees

def get_bounding_box_side(trees):
    """Get the side length of the bounding box for a configuration."""
    if not trees:
        return 0
    
    all_x = []
    all_y = []
    for tree in trees:
        coords = list(tree['polygon'].exterior.coords)
        all_x.extend([c[0] for c in coords])
        all_y.extend([c[1] for c in coords])
    
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def check_overlaps(trees):
    """Check if any trees overlap (touching is OK)."""
    polygons = [t['polygon'] for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for idx in candidates:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    return True
    return False

def calculate_score(df, max_n=200):
    """Calculate total score for submission."""
    total_score = 0
    for n in range(1, max_n + 1):
        trees = load_configuration(df, n)
        if trees:
            side = get_bounding_box_side(trees)
            score = side ** 2 / n
            total_score += score
    return total_score

print("Functions defined successfully!")

Functions defined successfully!


In [2]:
# Load sample submission and calculate baseline score
df = pd.read_csv('/home/data/sample_submission.csv')
print(f"Loaded {len(df)} rows")
print(f"Sample rows:")
print(df.head(10))

Loaded 20100 rows
Sample rows:
      id           x           y     deg
0  001_0        s0.0        s0.0   s90.0
1  002_0        s0.0        s0.0   s90.0
2  002_1   s0.202736  s-0.511271   s90.0
3  003_0        s0.0        s0.0   s90.0
4  003_1   s0.202736  s-0.511271   s90.0
5  003_2     s0.5206   s0.177413  s180.0
6  004_0        s0.0        s0.0   s90.0
7  004_1   s0.202736  s-0.511271   s90.0
8  004_2     s0.5206   s0.177413  s180.0
9  004_3  s-0.818657  s-0.228694  s180.0


In [3]:
# Calculate score for sample submission
print("Calculating baseline score (this may take a minute)...")
baseline_score = calculate_score(df)
print(f"\nBaseline Score: {baseline_score:.6f}")

Calculating baseline score (this may take a minute)...



Baseline Score: 173.652299


In [4]:
# Check for overlaps in a few configurations
print("Checking for overlaps in sample configurations...")
for n in [1, 10, 50, 100, 200]:
    trees = load_configuration(df, n)
    has_overlap = check_overlaps(trees)
    side = get_bounding_box_side(trees)
    print(f"N={n:3d}: side={side:.4f}, score_contrib={side**2/n:.4f}, overlaps={has_overlap}")

Checking for overlaps in sample configurations...
N=  1: side=1.0000, score_contrib=1.0000, overlaps=False
N= 10: side=3.4411, score_contrib=1.1841, overlaps=False
N= 50: side=6.6492, score_contrib=0.8842, overlaps=False
N=100: side=8.9561, score_contrib=0.8021, overlaps=False
N=200: side=13.0345, score_contrib=0.8495, overlaps=False


In [5]:
# Copy sample submission as our starting baseline
import shutil
shutil.copy('/home/data/sample_submission.csv', '/home/submission/submission.csv')
print("Copied sample_submission.csv to /home/submission/submission.csv")

Copied sample_submission.csv to /home/submission/submission.csv
