# Baseline Experiment - Santa 2025

Goal: Establish baseline score using the best snapshot submission.

**Scoring Formula:** score = Σ(side_n² / n) for n=1 to 200 (lower is better)

**Target Score:** Beat 68.894234

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely import STRtree
import math
from decimal import Decimal, getcontext
getcontext().prec = 50

# Tree shape vertices (15 points)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print(f"Tree has {len(TX)} vertices")

In [None]:
def get_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg degrees."""
    angle_rad = math.radians(deg)
    cos_a = math.cos(angle_rad)
    sin_a = math.sin(angle_rad)
    
    vertices = []
    for tx, ty in zip(TX, TY):
        # Rotate around origin, then translate
        rx = tx * cos_a - ty * sin_a + x
        ry = tx * sin_a + ty * cos_a + y
        vertices.append((rx, ry))
    
    return Polygon(vertices)

def get_bounding_box_side(polygons):
    """Get the side length of the square bounding box for a list of polygons."""
    if not polygons:
        return 0
    
    min_x = min_y = float('inf')
    max_x = max_y = float('-inf')
    
    for poly in polygons:
        bounds = poly.bounds  # (minx, miny, maxx, maxy)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])
    
    width = max_x - min_x
    height = max_y - min_y
    return max(width, height)

def check_overlaps(polygons):
    """Check if any polygons overlap (not just touch)."""
    if len(polygons) < 2:
        return False
    
    tree = STRtree(polygons)
    for i, poly in enumerate(polygons):
        # Get potential overlapping polygons
        candidates = tree.query(poly)
        for j in candidates:
            if j <= i:
                continue
            if poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                return True
    return False

print("Functions defined")

In [None]:
# Load the snapshot submission
df = pd.read_csv('/home/code/experiments/001_baseline/snapshot_submission.csv')
print(f"Submission shape: {df.shape}")
print(df.head(10))

In [None]:
# Parse the submission - values are prefixed with 's'
def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

df['x_val'] = df['x'].apply(parse_value)
df['y_val'] = df['y'].apply(parse_value)
df['deg_val'] = df['deg'].apply(parse_value)

# Extract n from id (format: NNN_idx)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"N values range: {df['n'].min()} to {df['n'].max()}")
print(f"Total trees: {len(df)}")
print(f"Expected total: {sum(range(1, 201))} = {200*201//2}")

In [None]:
# Calculate score for each n-configuration
scores = []
all_valid = True

for n in range(1, 201):
    config = df[df['n'] == n]
    if len(config) != n:
        print(f"ERROR: n={n} has {len(config)} trees, expected {n}")
        all_valid = False
        continue
    
    # Create polygons
    polygons = []
    for _, row in config.iterrows():
        poly = get_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    # Get bounding box side
    side = get_bounding_box_side(polygons)
    
    # Calculate score contribution
    score_contrib = (side ** 2) / n
    scores.append({'n': n, 'side': side, 'score_contrib': score_contrib})
    
    if n <= 5 or n % 50 == 0:
        print(f"n={n}: side={side:.6f}, score_contrib={score_contrib:.6f}")

print(f"\nAll configurations valid: {all_valid}")

In [None]:
# Calculate total score
scores_df = pd.DataFrame(scores)
total_score = scores_df['score_contrib'].sum()

print(f"\n=== BASELINE SCORE ===")
print(f"Total Score: {total_score:.6f}")
print(f"Target Score: 68.894234")
print(f"Gap to target: {total_score - 68.894234:.6f}")

# Show top contributors to score
print(f"\n=== Top 10 Score Contributors ===")
top_contrib = scores_df.nlargest(10, 'score_contrib')
print(top_contrib.to_string(index=False))

In [None]:
# Validate no overlaps for a sample of configurations
print("Validating overlaps for sample configurations...")

for n in [1, 5, 10, 20, 50, 100, 150, 200]:
    config = df[df['n'] == n]
    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in config.iterrows()]
    has_overlap = check_overlaps(polygons)
    print(f"n={n}: overlaps={has_overlap}")

print("\nValidation complete")

In [None]:
# Save the baseline submission to the submission folder
import shutil
import os

os.makedirs('/home/submission', exist_ok=True)
shutil.copy('/home/code/experiments/001_baseline/snapshot_submission.csv', '/home/submission/submission.csv')

print("Submission saved to /home/submission/submission.csv")

# Also save metrics
import json
metrics = {'cv_score': total_score}
with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)

print(f"Metrics saved: {metrics}")