# Christmas Tree Packing - Quick EDA

In [1]:
import pandas as pd
import numpy as np

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f"Shape: {df.shape}")
print(f"\nFirst 10 rows:")
df.head(10)

Shape: (20100, 4)

First 10 rows:


Unnamed: 0,id,x,y,deg
0,001_0,s0.0,s0.0,s90.0
1,002_0,s0.0,s0.0,s90.0
2,002_1,s0.202736,s-0.511271,s90.0
3,003_0,s0.0,s0.0,s90.0
4,003_1,s0.202736,s-0.511271,s90.0
5,003_2,s0.5206,s0.177413,s180.0
6,004_0,s0.0,s0.0,s90.0
7,004_1,s0.202736,s-0.511271,s90.0
8,004_2,s0.5206,s0.177413,s180.0
9,004_3,s-0.818657,s-0.228694,s180.0


In [None]:
# Calculate score for sample submission
def strip_s(val):
    return float(str(val).lstrip('s'))

df['x_val'] = df['x'].apply(strip_s)
df['y_val'] = df['y'].apply(strip_s)
df['deg_val'] = df['deg'].apply(strip_s)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

# Calculate bounding box side for each n
scores = []
for n in range(1, 201):
    group = df[df['n'] == n]
    # Simple bounding box (not accounting for tree shape - just centers)
    x_range = group['x_val'].max() - group['x_val'].min()
    y_range = group['y_val'].max() - group['y_val'].min()
    # This is approximate - actual side depends on tree polygon bounds
    side_approx = max(x_range, y_range) + 1.0  # Add ~1 for tree size
    score_n = side_approx**2 / n
    scores.append({'n': n, 'side_approx': side_approx, 'score_n': score_n})

scores_df = pd.DataFrame(scores)
print(f"Approximate total score (rough estimate): {scores_df['score_n'].sum():.2f}")
print(f"\\nScore breakdown by n (first 20):")
scores_df.head(20)