# Quick EDA for Tree Packing Problem

In [1]:
import pandas as pd
import numpy as np

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f'Shape: {df.shape}')
print(df.head(20))

Shape: (20100, 4)
       id           x           y     deg
0   001_0        s0.0        s0.0   s90.0
1   002_0        s0.0        s0.0   s90.0
2   002_1   s0.202736  s-0.511271   s90.0
3   003_0        s0.0        s0.0   s90.0
4   003_1   s0.202736  s-0.511271   s90.0
5   003_2     s0.5206   s0.177413  s180.0
6   004_0        s0.0        s0.0   s90.0
7   004_1   s0.202736  s-0.511271   s90.0
8   004_2     s0.5206   s0.177413  s180.0
9   004_3  s-0.818657  s-0.228694  s180.0
10  005_0        s0.0        s0.0   s90.0
11  005_1   s0.202736  s-0.511271   s90.0
12  005_2     s0.5206   s0.177413  s180.0
13  005_3  s-0.818657  s-0.228694  s180.0
14  005_4   s0.111852   s0.893022  s180.0
15  006_0        s0.0        s0.0   s90.0
16  006_1   s0.202736  s-0.511271   s90.0
17  006_2     s0.5206   s0.177413  s180.0
18  006_3  s-0.818657  s-0.228694  s180.0
19  006_4   s0.111852   s0.893022  s180.0


In [2]:
# Calculate score for sample submission
# Score = sum(s^2/n) where s is side length of bounding box

def strip_s(val):
    return float(str(val).lstrip('s'))

df['x_val'] = df['x'].apply(strip_s)
df['y_val'] = df['y'].apply(strip_s)
df['deg_val'] = df['deg'].apply(strip_s)

# Extract n from id
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

# For each n, compute approximate bounding box (without tree shape)
# This is just center points, actual bbox needs tree geometry
scores = []
for n in range(1, 201):
    group = df[df['n'] == n]
    x_range = group['x_val'].max() - group['x_val'].min()
    y_range = group['y_val'].max() - group['y_val'].min()
    # Add approximate tree size (about 1.0 in each direction)
    side = max(x_range + 1.0, y_range + 1.0)
    score = side**2 / n
    scores.append({'n': n, 'side': side, 'score': score})

scores_df = pd.DataFrame(scores)
print(f"Approximate total score: {scores_df['score'].sum():.6f}")
print(f"\nScore distribution by n:")
print(scores_df.describe())

Approximate total score: 157.014760

Score distribution by n:
                n        side       score
count  200.000000  200.000000  200.000000
mean   100.500000    8.305593    0.785074
std     57.879185    2.855474    0.078182
min      1.000000    1.000000    0.695447
25%     50.750000    6.201238    0.746737
50%    100.500000    8.638027    0.770550
75%    150.250000   10.675546    0.796812
max    200.000000   12.434470    1.368031


In [None]:
# Look at rotation angles used
print("Rotation angle distribution:")
print(df['deg_val'].value_counts().head(20))