# Baseline Experiment - Santa 2025

Validate and score the best pre-optimized submission from snapshots.

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json

getcontext().prec = 30

# Tree shape definition
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, angle_deg):
    """Create a tree polygon at position (x,y) with given rotation angle."""
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle_deg, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x, yoff=y)
    return poly

def has_overlap(poly1, poly2, tolerance=1e-9):
    """Check if two polygons overlap (not just touch)."""
    if not poly1.intersects(poly2):
        return False
    intersection = poly1.intersection(poly2)
    return intersection.area > tolerance

print("Functions defined successfully")

In [None]:
# Load the best baseline submission
baseline_path = '/home/code/best_baseline.csv'
df = pd.read_csv(baseline_path)

# Parse the submission
df['x_val'] = df['x'].astype(str).str.replace('s', '').astype(float)
df['y_val'] = df['y'].astype(str).str.replace('s', '').astype(float)
df['deg_val'] = df['deg'].astype(str).str.replace('s', '').astype(float)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

print(f"Loaded {len(df)} rows")
print(f"N values: {df['n'].min()} to {df['n'].max()}")
print(df.head())

In [None]:
# Validate and score the submission
def validate_and_score(df, check_overlaps=True):
    """Validate submission has no overlaps and calculate score."""
    total_score = 0
    scores_by_n = {}
    overlaps_found = []
    
    for n in range(1, 201):
        group = df[df['n'] == n]
        if len(group) != n:
            print(f"WARNING: N={n} has {len(group)} trees instead of {n}")
            continue
            
        polys = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                 for _, row in group.iterrows()]
        
        # Check overlaps (expensive, can skip for speed)
        if check_overlaps:
            for i in range(len(polys)):
                for j in range(i+1, len(polys)):
                    if has_overlap(polys[i], polys[j]):
                        overlaps_found.append((n, i, j))
        
        # Calculate bounding box side length
        union = unary_union(polys)
        bounds = union.bounds
        side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
        score_n = side**2 / n
        scores_by_n[n] = score_n
        total_score += score_n
        
        if n <= 10 or n % 50 == 0:
            print(f"N={n}: side={side:.6f}, score_contribution={score_n:.6f}")
    
    return total_score, scores_by_n, overlaps_found

print("Starting validation (this may take a few minutes)...")
total_score, scores_by_n, overlaps = validate_and_score(df, check_overlaps=True)
print(f"\nTotal Score: {total_score:.6f}")
print(f"Overlaps found: {len(overlaps)}")
if overlaps:
    print(f"First 10 overlaps: {overlaps[:10]}")

In [None]:
# Analyze score distribution
print("\nTop 10 worst N values (highest score contribution):")
sorted_scores = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    print(f"  N={n}: {score:.6f}")

print("\nTop 10 best N values (lowest score contribution):")
for n, score in sorted_scores[-10:]:
    print(f"  N={n}: {score:.6f}")

# Target comparison
target = 68.894234
print(f"\nTarget: {target}")
print(f"Current: {total_score:.6f}")
print(f"Gap: {total_score - target:.6f}")

In [None]:
# Save submission and metrics
import shutil

# Copy to submission folder
shutil.copy(baseline_path, '/home/submission/submission.csv')
shutil.copy(baseline_path, '/home/code/experiments/001_baseline/submission.csv')

# Save metrics
metrics = {
    'cv_score': total_score,
    'overlaps': len(overlaps),
    'target': target,
    'gap_to_target': total_score - target,
    'scores_by_n': {str(k): v for k, v in scores_by_n.items()}
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Saved submission to /home/submission/submission.csv")
print(f"Saved metrics to /home/code/experiments/001_baseline/metrics.json")
print(f"\nFinal CV Score: {total_score:.6f}")