# Baseline Experiment - Santa 2025

Goal: Find the best pre-optimized submission and establish a baseline score.

Target: Beat 68.901319

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os
import glob

# Tree geometry (15-vertex polygon)
def get_tree_polygon():
    """Returns the base Christmas tree polygon centered at origin."""
    # Tree shape from the getting started notebook
    vertices = [
        (-0.075, -0.2),   # trunk bottom left
        (-0.075, 0.0),    # trunk top left
        (-0.35, 0.0),     # base tier left
        (-0.2, 0.25),     # base tier top left
        (-0.35, 0.25),    # middle tier bottom left
        (-0.125, 0.5),    # middle tier top left
        (-0.2, 0.5),      # top tier bottom left
        (0.0, 0.8),       # tip
        (0.2, 0.5),       # top tier bottom right
        (0.125, 0.5),     # middle tier top right
        (0.35, 0.25),     # middle tier bottom right
        (0.2, 0.25),      # base tier top right
        (0.35, 0.0),      # base tier right
        (0.075, 0.0),     # trunk top right
        (0.075, -0.2),    # trunk bottom right
    ]
    return Polygon(vertices)

BASE_TREE = get_tree_polygon()
print(f"Base tree bounds: {BASE_TREE.bounds}")
print(f"Base tree area: {BASE_TREE.area:.4f}")

In [None]:
def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg."""
    tree = affinity.rotate(BASE_TREE, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

def calculate_bounding_box_side(trees):
    """Calculate the side length of the bounding square for a list of tree polygons."""
    if not trees:
        return 0
    
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.exterior.coords))
    
    points = np.array(all_points)
    min_x, max_x = points[:, 0].min(), points[:, 0].max()
    min_y, max_y = points[:, 1].min(), points[:, 1].max()
    
    return max(max_x - min_x, max_y - min_y)

def score_submission(df):
    """Calculate the total score for a submission dataframe."""
    total_score = 0
    side_lengths = {}
    
    for n in range(1, 201):
        # Get trees for this N
        prefix = f"{n:03d}_"
        n_trees = df[df['id'].str.startswith(prefix)]
        
        if len(n_trees) == 0:
            print(f"Warning: No trees found for N={n}")
            continue
        
        # Create tree polygons
        trees = []
        for _, row in n_trees.iterrows():
            x = float(str(row['x']).replace('s', ''))
            y = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            trees.append(create_tree_polygon(x, y, deg))
        
        # Calculate bounding box side
        side = calculate_bounding_box_side(trees)
        side_lengths[n] = side
        
        # Add to total score
        total_score += (side ** 2) / n
    
    return total_score, side_lengths

print("Scoring functions defined.")

In [None]:
# Find all pre-optimized submissions
preopt_base = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

csv_files = []
for root, dirs, files in os.walk(preopt_base):
    for f in files:
        if f.endswith('.csv'):
            csv_files.append(os.path.join(root, f))

print(f"Found {len(csv_files)} CSV files:")
for f in csv_files:
    print(f"  {f}")

In [None]:
# Score a few key submissions to find the best one
key_files = [
    f'{preopt_base}/santa-2025.csv',
    f'{preopt_base}/best_ensemble.csv',
    f'{preopt_base}/ensemble.csv',
    f'{preopt_base}/santa25-public/submission_70_926149550346.csv',
    f'{preopt_base}/santa25-public/submission_70_936673758122.csv',
]

results = []
for fpath in key_files:
    if os.path.exists(fpath):
        try:
            df = pd.read_csv(fpath)
            score, _ = score_submission(df)
            results.append((fpath, score))
            print(f"{os.path.basename(fpath)}: {score:.6f}")
        except Exception as e:
            print(f"{os.path.basename(fpath)}: ERROR - {e}")
    else:
        print(f"{os.path.basename(fpath)}: NOT FOUND")

print("\nBest submission:")
if results:
    best = min(results, key=lambda x: x[1])
    print(f"  {best[0]}: {best[1]:.6f}")

In [None]:
# Let's also check the submission.csv from the snapshot
snapshot_submission = '/home/nonroot/snapshots/santa-2025/21116303805/code/submission.csv'
if os.path.exists(snapshot_submission):
    df = pd.read_csv(snapshot_submission)
    score, side_lengths = score_submission(df)
    print(f"Snapshot submission.csv: {score:.6f}")
    
    # Show score breakdown by N ranges
    print("\nScore breakdown by N range:")
    for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
        range_score = sum((side_lengths[n]**2)/n for n in range(start, end+1))
        print(f"  N={start}-{end}: {range_score:.6f}")

In [None]:
# Use the best pre-optimized submission as our baseline
# Based on previous analysis, santa-2025.csv is the best at 70.676102
best_file = f'{preopt_base}/santa-2025.csv'
df_best = pd.read_csv(best_file)

# Verify the score
score, side_lengths = score_submission(df_best)
print(f"Best baseline score: {score:.6f}")
print(f"Target score: 68.901319")
print(f"Gap: {score - 68.901319:.6f}")

# Save as our baseline submission
df_best.to_csv('/home/submission/submission.csv', index=False)
print("\nSaved to /home/submission/submission.csv")

In [None]:
# Analyze which N values have the most room for improvement
print("\nTop 20 N values by score contribution (S^2/n):")
contributions = [(n, (side_lengths[n]**2)/n, side_lengths[n]) for n in range(1, 201)]
contributions.sort(key=lambda x: x[1], reverse=True)

for n, contrib, side in contributions[:20]:
    print(f"  N={n:3d}: contrib={contrib:.6f}, side={side:.6f}")

In [None]:
# Check for overlaps in the submission
from shapely.strtree import STRtree

def check_overlaps(df, n):
    """Check if any trees overlap for configuration N."""
    prefix = f"{n:03d}_"
    n_trees = df[df['id'].str.startswith(prefix)]
    
    trees = []
    for _, row in n_trees.iterrows():
        x = float(str(row['x']).replace('s', ''))
        y = float(str(row['y']).replace('s', ''))
        deg = float(str(row['deg']).replace('s', ''))
        trees.append(create_tree_polygon(x, y, deg))
    
    # Check all pairs for overlap
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if trees[i].intersects(trees[j]) and not trees[i].touches(trees[j]):
                # Check if it's a real overlap (not just touching)
                intersection = trees[i].intersection(trees[j])
                if intersection.area > 1e-10:
                    return True, i, j, intersection.area
    return False, None, None, 0

# Check a few configurations
print("Checking for overlaps in key configurations...")
for n in [1, 2, 3, 4, 5, 10, 50, 100, 150, 200]:
    has_overlap, i, j, area = check_overlaps(df_best, n)
    if has_overlap:
        print(f"  N={n}: OVERLAP between trees {i} and {j}, area={area:.10f}")
    else:
        print(f"  N={n}: OK")

In [None]:
# Final summary
print("="*60)
print("BASELINE EXPERIMENT SUMMARY")
print("="*60)
print(f"Best pre-optimized file: {best_file}")
print(f"Baseline score: {score:.6f}")
print(f"Target score: 68.901319")
print(f"Gap to target: {score - 68.901319:.6f} ({(score - 68.901319)/68.901319*100:.2f}%)")
print(f"\nSubmission saved to: /home/submission/submission.csv")
print("="*60)