# Santa 2025 - Quick EDA

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
getcontext().prec = 30

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f"Shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
df.head(10)

Shape: (20100, 4)
Columns: ['id', 'x', 'y', 'deg']


Unnamed: 0,id,x,y,deg
0,001_0,s0.0,s0.0,s90.0
1,002_0,s0.0,s0.0,s90.0
2,002_1,s0.202736,s-0.511271,s90.0
3,003_0,s0.0,s0.0,s90.0
4,003_1,s0.202736,s-0.511271,s90.0
5,003_2,s0.5206,s0.177413,s180.0
6,004_0,s0.0,s0.0,s90.0
7,004_1,s0.202736,s-0.511271,s90.0
8,004_2,s0.5206,s0.177413,s180.0
9,004_3,s-0.818657,s-0.228694,s180.0


In [2]:
# Parse the data
df['x'] = df['x'].astype(str).str.lstrip('s').astype(float)
df['y'] = df['y'].astype(str).str.lstrip('s').astype(float)
df['deg'] = df['deg'].astype(str).str.lstrip('s').astype(float)
df['n'] = df['id'].str.split('_').str[0].astype(int)
df['tree_idx'] = df['id'].str.split('_').str[1].astype(int)

print(f"N values: {df['n'].min()} to {df['n'].max()}")
print(f"Total rows: {len(df)}")
print(f"Expected rows: {sum(range(1, 201))} = 1+2+...+200")
print(f"Match: {len(df) == sum(range(1, 201))}")

N values: 1 to 200
Total rows: 20100
Expected rows: 20100 = 1+2+...+200
Match: True


In [None]:
# Load the best pre-optimized submission and calculate scores
best_df = pd.read_csv('/home/nonroot/snapshots/santa-2025/21156851249/submission/submission.csv')
best_df['x'] = best_df['x'].astype(str).str.lstrip('s').astype(float)
best_df['y'] = best_df['y'].astype(str).str.lstrip('s').astype(float)
best_df['deg'] = best_df['deg'].astype(str).str.lstrip('s').astype(float)
best_df['n'] = best_df['id'].str.split('_').str[0].astype(int)

# Calculate approximate side lengths (this is rough - actual needs polygon bounds)
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, deg):
    base_poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(base_poly, deg, origin=(0, 0))
    return affinity.translate(rotated, x, y)

def calculate_side(group):
    polys = [get_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    union = unary_union(polys)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

# Calculate for a few N values
scores = []
for n in [1, 2, 3, 5, 10, 20, 50, 100, 150, 200]:
    group = best_df[best_df['n'] == n]
    side = calculate_side(group)
    score = side**2 / n
    scores.append({'n': n, 'side': side, 'score': score, 'efficiency': n / side**2})
    print(f"N={n:3d}: side={side:.6f}, score={score:.6f}, efficiency={n/side**2:.4f}")

print(f"\nTotal score (sampled): {sum(s['score'] for s in scores):.4f}")