In [1]:
import pandas as pd
import numpy as np

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f'Shape: {df.shape}')
print(f'\nFirst 10 rows:')
df.head(10)

Shape: (20100, 4)

First 10 rows:


Unnamed: 0,id,x,y,deg
0,001_0,s0.0,s0.0,s90.0
1,002_0,s0.0,s0.0,s90.0
2,002_1,s0.202736,s-0.511271,s90.0
3,003_0,s0.0,s0.0,s90.0
4,003_1,s0.202736,s-0.511271,s90.0
5,003_2,s0.5206,s0.177413,s180.0
6,004_0,s0.0,s0.0,s90.0
7,004_1,s0.202736,s-0.511271,s90.0
8,004_2,s0.5206,s0.177413,s180.0
9,004_3,s-0.818657,s-0.228694,s180.0


In [None]:
# Analyze the sample submission - calculate score
from decimal import Decimal, getcontext
getcontext().prec = 30

# Parse the data
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['x_val'] = df['x'].str[1:].astype(float)
df['y_val'] = df['y'].str[1:].astype(float)
df['deg_val'] = df['deg'].str[1:].astype(float)

# Calculate score for each N
scores = []
for n in range(1, 201):
    group = df[df['n'] == n]
    x_min, x_max = group['x_val'].min(), group['x_val'].max()
    y_min, y_max = group['y_val'].min(), group['y_val'].max()
    
    # Note: This is approximate - actual bounding box needs tree polygon geometry
    # For now, just show the center point spread
    width = x_max - x_min
    height = y_max - y_min
    side = max(width, height)
    score = side**2 / n
    scores.append({'n': n, 'width': width, 'height': height, 'side': side, 'score': score})

scores_df = pd.DataFrame(scores)
print(f"Total approximate score (center points only): {scores_df['score'].sum():.6f}")
print(f"\\nNote: Actual score requires tree polygon geometry (adds ~0.7-1.0 to each dimension)")
print(f"\\nTop 10 largest score contributions:")
scores_df.nlargest(10, 'score')