# Baseline Analysis - Santa 2025

Load and verify the best pre-optimized submission from snapshots.

In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_vertices(x, y, angle_deg):
    """Get tree polygon vertices at position (x,y) with rotation angle_deg."""
    rad = np.radians(angle_deg)
    cos_a, sin_a = np.cos(rad), np.sin(rad)
    
    # Rotate then translate
    rx = TX * cos_a - TY * sin_a + x
    ry = TX * sin_a + TY * cos_a + y
    
    return rx, ry

def calculate_score_for_n(trees):
    """Calculate score for a single N value."""
    all_xs = []
    all_ys = []
    for x, y, angle in trees:
        rx, ry = get_tree_vertices(x, y, angle)
        all_xs.extend(rx)
        all_ys.extend(ry)
    
    width = max(all_xs) - min(all_xs)
    height = max(all_ys) - min(all_ys)
    side = max(width, height)
    n = len(trees)
    return (side ** 2) / n

print("Functions defined successfully")

In [None]:
# Load the baseline submission
df = pd.read_csv('submission.csv')
print(f"Submission shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print(f"\nFirst 10 rows:")
print(df.head(10))

In [None]:
# Parse the submission
def parse_submission(df):
    """Parse submission CSV into dict of n -> list of (x, y, angle) tuples."""
    configs = defaultdict(list)
    
    for _, row in df.iterrows():
        # Parse id to get n and tree index
        parts = row['id'].split('_')
        n = int(parts[0])
        
        # Parse coordinates (remove 's' prefix)
        x = float(str(row['x']).replace('s', ''))
        y = float(str(row['y']).replace('s', ''))
        deg = float(str(row['deg']).replace('s', ''))
        
        configs[n].append((x, y, deg))
    
    return dict(configs)

configs = parse_submission(df)
print(f"Parsed {len(configs)} N values")
print(f"N=1 has {len(configs[1])} trees")
print(f"N=200 has {len(configs[200])} trees")
print(f"\nN=1 config: {configs[1]}")
print(f"N=2 config: {configs[2]}")

In [None]:
# Calculate total score
def calculate_total_score(configs):
    """Calculate total score for all N values."""
    total = 0
    scores_by_n = {}
    
    for n in range(1, 201):
        if n not in configs:
            print(f"WARNING: Missing N={n}")
            continue
        
        trees = configs[n]
        if len(trees) != n:
            print(f"WARNING: N={n} has {len(trees)} trees instead of {n}")
        
        score_n = calculate_score_for_n(trees)
        scores_by_n[n] = score_n
        total += score_n
    
    return total, scores_by_n

total_score, scores_by_n = calculate_total_score(configs)
print(f"\nTotal Score: {total_score:.6f}")
print(f"Target: 68.882921")
print(f"Gap: {total_score - 68.882921:.6f}")

In [None]:
# Analyze score contributions by N
print("\nTop 10 highest score contributions (worst N values):")
sorted_scores = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    print(f"  N={n}: {score:.6f}")

print("\nTop 10 lowest score contributions (best N values):")
for n, score in sorted_scores[-10:]:
    print(f"  N={n}: {score:.6f}")

print(f"\nN=1 score: {scores_by_n[1]:.6f} (theoretical optimal: 0.6612)")
print(f"N=2 score: {scores_by_n[2]:.6f}")
print(f"N=3 score: {scores_by_n[3]:.6f}")

In [None]:
# Save metrics
import json

metrics = {
    'cv_score': total_score,
    'target': 68.882921,
    'gap': total_score - 68.882921,
    'scores_by_n': {str(k): v for k, v in scores_by_n.items()}
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved to metrics.json")
print(f"\nBaseline CV Score: {total_score:.6f}")