# Baseline Experiment - Pre-optimized santa-2025.csv

This notebook validates and scores the pre-optimized santa-2025.csv baseline submission.

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import json

print('Libraries loaded')

In [None]:
# Define the ChristmasTree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)
        
        # 15-vertex polygon definition
        initial_polygon = Polygon([
            (0.0, 0.8),      # Tip
            (0.125, 0.5),    # Right top tier
            (0.0625, 0.5),
            (0.2, 0.25),     # Right mid tier
            (0.1, 0.25),
            (0.35, 0.0),     # Right base
            (0.075, 0.0),    # Right trunk
            (0.075, -0.2),
            (-0.075, -0.2),  # Left trunk
            (-0.075, 0.0),
            (-0.35, 0.0),    # Left base
            (-0.1, 0.25),    # Left mid tier
            (-0.2, 0.25),
            (-0.0625, 0.5),  # Left top tier
            (-0.125, 0.5),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

print('ChristmasTree class defined')

In [None]:
# Load the baseline submission
df = pd.read_csv('/home/code/experiments/001_baseline/santa-2025.csv')
print(f'Loaded {len(df)} rows')
print(df.head(10))

In [None]:
# Parse the submission - values are prefixed with 's'
def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

# Load trees for a given N
def load_trees_for_n(df, n):
    prefix = f'{n:03d}_'
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

print('Helper functions defined')

In [None]:
# Calculate score for a single N
def get_score_for_n(df, n):
    trees = load_trees_for_n(df, n)
    if len(trees) != n:
        print(f'Warning: N={n} has {len(trees)} trees instead of {n}')
        return None
    
    # Get all vertices
    all_points = []
    for tree in trees:
        coords = list(tree.polygon.exterior.coords)
        all_points.extend(coords)
    
    xys = np.array(all_points)
    x_range = xys[:, 0].max() - xys[:, 0].min()
    y_range = xys[:, 1].max() - xys[:, 1].min()
    side = max(x_range, y_range)
    
    return side**2 / n

print('Score function defined')

In [None]:
# Calculate total score
def get_total_score(df):
    total = 0
    scores_by_n = {}
    for n in range(1, 201):
        score_n = get_score_for_n(df, n)
        if score_n is not None:
            total += score_n
            scores_by_n[n] = score_n
    return total, scores_by_n

print('Calculating total score...')
total_score, scores_by_n = get_total_score(df)
print(f'\nTotal Score: {total_score:.6f}')

In [None]:
# Show worst N values (highest contribution to score)
scores_sorted = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
print('Top 10 worst N values (highest score contribution):')
for n, score in scores_sorted[:10]:
    trees = load_trees_for_n(df, n)
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    xys = np.array(all_points)
    side = max(xys[:, 0].max() - xys[:, 0].min(), xys[:, 1].max() - xys[:, 1].min())
    print(f'  N={n}: side={side:.6f}, score_contribution={score:.6f}')

In [None]:
# Check for overlaps (sample a few N values)
def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                # Check if it's a real overlap (not just touching)
                intersection = poly.intersection(polygons[idx])
                if intersection.area > 1e-10:
                    return True
    return False

print('Checking for overlaps in sample N values...')
overlap_found = False
for n in [1, 5, 10, 20, 50, 100, 150, 200]:
    trees = load_trees_for_n(df, n)
    if has_overlap(trees):
        print(f'  N={n}: OVERLAP FOUND!')
        overlap_found = True
    else:
        print(f'  N={n}: No overlap')

if not overlap_found:
    print('\nNo overlaps detected in sampled N values.')

In [None]:
# Save metrics
metrics = {
    'cv_score': total_score,
    'total_score': total_score,
    'worst_n_values': [(n, score) for n, score in scores_sorted[:10]]
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f'Metrics saved. CV Score: {total_score:.6f}')

In [None]:
# Copy to submission folder
import shutil
import os

os.makedirs('/home/submission', exist_ok=True)
shutil.copy('/home/code/experiments/001_baseline/santa-2025.csv', '/home/submission/submission.csv')
print('Submission copied to /home/submission/submission.csv')