# Christmas Tree Packing - EDA

Understanding the problem structure and sample submission.

In [1]:
import pandas as pd
import numpy as np

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f'Shape: {df.shape}')
print(f'Columns: {df.columns.tolist()}')
df.head(10)

Shape: (20100, 4)
Columns: ['id', 'x', 'y', 'deg']


Unnamed: 0,id,x,y,deg
0,001_0,s0.0,s0.0,s90.0
1,002_0,s0.0,s0.0,s90.0
2,002_1,s0.202736,s-0.511271,s90.0
3,003_0,s0.0,s0.0,s90.0
4,003_1,s0.202736,s-0.511271,s90.0
5,003_2,s0.5206,s0.177413,s180.0
6,004_0,s0.0,s0.0,s90.0
7,004_1,s0.202736,s-0.511271,s90.0
8,004_2,s0.5206,s0.177413,s180.0
9,004_3,s-0.818657,s-0.228694,s180.0


In [2]:
# Parse the submission structure\ndf['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))\ndf['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))\n\n# Strip 's' prefix and convert to float\ndf['x_val'] = df['x'].str.lstrip('s').astype(float)\ndf['y_val'] = df['y'].str.lstrip('s').astype(float)\ndf['deg_val'] = df['deg'].str.lstrip('s').astype(float)\n\nprint(f'Number of configurations: {df[\"n\"].nunique()}')\nprint(f'Tree counts range: {df[\"n\"].min()} to {df[\"n\"].max()}')\nprint(f'Total trees: {len(df)}')\nprint(f'Expected total: {sum(range(1, 201))} = 1+2+...+200')

In [3]:
# Calculate score for each configuration\nfrom decimal import Decimal, getcontext\ngetcontext().prec = 25\n\ndef calculate_bounding_box(group):\n    \"\"\"Calculate bounding box side length for a group of trees.\"\"\"\n    # Tree dimensions\n    trunk_w = 0.15\n    trunk_h = 0.2\n    base_w = 0.7\n    tip_y = 0.8\n    \n    # For simplicity, approximate tree as bounding rectangle\n    # Actual calculation would need full polygon rotation\n    min_x = group['x_val'].min() - base_w/2\n    max_x = group['x_val'].max() + base_w/2\n    min_y = group['y_val'].min() - trunk_h\n    max_y = group['y_val'].max() + tip_y\n    \n    width = max_x - min_x\n    height = max_y - min_y\n    return max(width, height)\n\n# Calculate approximate scores\nscores = []\nfor n, group in df.groupby('n'):\n    side = calculate_bounding_box(group)\n    score = side**2 / n\n    scores.append({'n': n, 'side': side, 'score': score})\n\nscores_df = pd.DataFrame(scores)\nprint(f'Total approximate score: {scores_df[\"score\"].sum():.6f}')\nprint(f'\\nTop 10 contributors to score:')\nprint(scores_df.nlargest(10, 'score')[['n', 'side', 'score']])

In [4]:
# Analyze rotation angles distribution\nprint('Rotation angle distribution:')\nprint(df['deg_val'].describe())\nprint(f'\\nUnique angles: {df[\"deg_val\"].nunique()}')\nprint(f'\\nMost common angles:')\nprint(df['deg_val'].value_counts().head(10))

In [5]:
# Analyze coordinate ranges\nprint('Coordinate ranges:')\nprint(f'X: [{df[\"x_val\"].min():.4f}, {df[\"x_val\"].max():.4f}]')\nprint(f'Y: [{df[\"y_val\"].min():.4f}, {df[\"y_val\"].max():.4f}]')\n\n# Check n=200 configuration (largest)\nn200 = df[df['n'] == 200]\nprint(f'\\nFor n=200:')\nprint(f'X range: [{n200[\"x_val\"].min():.4f}, {n200[\"x_val\"].max():.4f}]')\nprint(f'Y range: [{n200[\"y_val\"].min():.4f}, {n200[\"y_val\"].max():.4f}]')