# Quick EDA - Christmas Tree Packing

In [1]:
import pandas as pd
import numpy as np

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f'Shape: {df.shape}')
print(f'Columns: {df.columns.tolist()}')
df.head(10)

Shape: (20100, 4)
Columns: ['id', 'x', 'y', 'deg']


Unnamed: 0,id,x,y,deg
0,001_0,s0.0,s0.0,s90.0
1,002_0,s0.0,s0.0,s90.0
2,002_1,s0.202736,s-0.511271,s90.0
3,003_0,s0.0,s0.0,s90.0
4,003_1,s0.202736,s-0.511271,s90.0
5,003_2,s0.5206,s0.177413,s180.0
6,004_0,s0.0,s0.0,s90.0
7,004_1,s0.202736,s-0.511271,s90.0
8,004_2,s0.5206,s0.177413,s180.0
9,004_3,s-0.818657,s-0.228694,s180.0


In [2]:
# Count trees per configuration
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
print(f"Configurations: {df['n'].nunique()} (1 to {df['n'].max()})")
print(f"Total trees: {len(df)}")
print(f"Expected: sum(1 to 200) = {sum(range(1, 201))}")

# Verify structure
tree_counts = df.groupby('n').size()
print(f"\nTree counts per config (first 10):")
print(tree_counts.head(10))

Configurations: 200 (1 to 200)
Total trees: 20100
Expected: sum(1 to 200) = 20100

Tree counts per config (first 10):
n
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
dtype: int64


In [None]:
# Calculate score for sample submission (baseline)
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union

getcontext().prec = 25

def create_tree_polygon(x, y, deg):
    """Create tree polygon at position (x,y) with rotation deg"""
    initial_polygon = Polygon([
        (0.0, 0.8),
        (0.125, 0.5), (0.0625, 0.5),
        (0.2, 0.25), (0.1, 0.25),
        (0.35, 0.0), (0.075, 0.0), (0.075, -0.2),
        (-0.075, -0.2), (-0.075, 0.0), (-0.35, 0.0),
        (-0.1, 0.25), (-0.2, 0.25),
        (-0.0625, 0.5), (-0.125, 0.5),
    ])
    rotated = affinity.rotate(initial_polygon, float(deg), origin=(0, 0))
    return affinity.translate(rotated, xoff=float(x), yoff=float(y))

# Calculate side length for each configuration
def calc_side_length(group_df):
    polygons = []
    for _, row in group_df.iterrows():
        x = float(row['x'].lstrip('s'))
        y = float(row['y'].lstrip('s'))
        deg = float(row['deg'].lstrip('s'))
        polygons.append(create_tree_polygon(x, y, deg))
    
    bounds = unary_union(polygons).bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

# Calculate for first 20 configs as sample
print("Calculating side lengths for first 20 configs...")
side_lengths = {}
for n in range(1, 21):
    group = df[df['n'] == n]
    side = calc_side_length(group)
    side_lengths[n] = side
    print(f"N={n:3d}: side={side:.6f}, score_contrib={side**2/n:.6f}")