# EDA: Christmas Tree Packing Problem

This is a 2D geometric packing optimization problem. Goal: pack Christmas tree-shaped polygons into the smallest square bounding box for configurations of 1-200 trees.

In [1]:
import pandas as pd
import numpy as np

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f'Shape: {df.shape}')
print(f'\nColumns: {df.columns.tolist()}')
print(f'\nFirst 20 rows:')
df.head(20)

Shape: (20100, 4)

Columns: ['id', 'x', 'y', 'deg']

First 20 rows:


Unnamed: 0,id,x,y,deg
0,001_0,s0.0,s0.0,s90.0
1,002_0,s0.0,s0.0,s90.0
2,002_1,s0.202736,s-0.511271,s90.0
3,003_0,s0.0,s0.0,s90.0
4,003_1,s0.202736,s-0.511271,s90.0
5,003_2,s0.5206,s0.177413,s180.0
6,004_0,s0.0,s0.0,s90.0
7,004_1,s0.202736,s-0.511271,s90.0
8,004_2,s0.5206,s0.177413,s180.0
9,004_3,s-0.818657,s-0.228694,s180.0


In [2]:
# Understand the structure - total rows = sum(1 to 200) = 20100
print(f"Expected rows: {sum(range(1, 201))} = 20100")
print(f"Actual rows: {len(df)}")

# Parse the id to understand groups
df['group'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"\nNumber of groups (n-tree configurations): {df['group'].nunique()}")
print(f"Groups range: {df['group'].min()} to {df['group'].max()}")

# Check trees per group
trees_per_group = df.groupby('group').size()
print(f"\nTrees per group (first 10):")
print(trees_per_group.head(10))

Expected rows: 20100 = 20100
Actual rows: 20100

Number of groups (n-tree configurations): 200
Groups range: 1 to 200

Trees per group (first 10):
group
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
dtype: int64


In [4]:
# Strip 's' prefix and convert to numeric for analysis
df['x_val'] = df['x'].str.strip('s').astype(float)
df['y_val'] = df['y'].str.strip('s').astype(float)
df['deg_val'] = df['deg'].str.strip('s').astype(float)

print("Coordinate ranges:")
print(f"X: {df['x_val'].min():.6f} to {df['x_val'].max():.6f}")
print(f"Y: {df['y_val'].min():.6f} to {df['y_val'].max():.6f}")
print(f"Deg: {df['deg_val'].min():.1f} to {df['deg_val'].max():.1f}")

# Calculate bounding box side for each group
def calc_side(group_df):
    x_vals = group_df['x_val'].values
    y_vals = group_df['y_val'].values
    # Note: This is approximate - actual side needs tree polygon bounds
    return max(x_vals.max() - x_vals.min(), y_vals.max() - y_vals.min())

sides = df.groupby('group').apply(calc_side)
print(f"\nApproximate side lengths (first 10 groups):")
print(sides.head(10))

Coordinate ranges:
X: -5.798383 to 5.289256
Y: -5.635722 to 5.798748
Deg: 0.0 to 270.0

Approximate side lengths (first 10 groups):
group
1     0.000000
2     0.511271
3     0.688684
4     1.339257
5     1.404293
6     1.404293
7     1.751647
8     1.841115
9     1.942984
10    1.942984
dtype: float64


  sides = df.groupby('group').apply(calc_side)
