# Quick EDA - Christmas Tree Packing

In [1]:
import pandas as pd
import numpy as np

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f'Shape: {df.shape}')
print(f'Columns: {df.columns.tolist()}')
df.head(10)

Shape: (20100, 4)
Columns: ['id', 'x', 'y', 'deg']


Unnamed: 0,id,x,y,deg
0,001_0,s0.0,s0.0,s90.0
1,002_0,s0.0,s0.0,s90.0
2,002_1,s0.202736,s-0.511271,s90.0
3,003_0,s0.0,s0.0,s90.0
4,003_1,s0.202736,s-0.511271,s90.0
5,003_2,s0.5206,s0.177413,s180.0
6,004_0,s0.0,s0.0,s90.0
7,004_1,s0.202736,s-0.511271,s90.0
8,004_2,s0.5206,s0.177413,s180.0
9,004_3,s-0.818657,s-0.228694,s180.0


In [2]:
# Calculate total number of trees: sum(1 to 200) = 200*201/2 = 20100
print(f"Expected rows: {200*201//2}")
print(f"Actual rows: {len(df)}")

# Parse the data
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))
df['x_val'] = df['x'].str[1:].astype(float)
df['y_val'] = df['y'].str[1:].astype(float)
df['deg_val'] = df['deg'].str[1:].astype(float)

print(f"\nN values range: {df['n'].min()} to {df['n'].max()}")
print(f"\nSample parsed data:")
df[['id', 'n', 'tree_idx', 'x_val', 'y_val', 'deg_val']].head(10)

Expected rows: 20100
Actual rows: 20100

N values range: 1 to 200

Sample parsed data:


Unnamed: 0,id,n,tree_idx,x_val,y_val,deg_val
0,001_0,1,0,0.0,0.0,90.0
1,002_0,2,0,0.0,0.0,90.0
2,002_1,2,1,0.202736,-0.511271,90.0
3,003_0,3,0,0.0,0.0,90.0
4,003_1,3,1,0.202736,-0.511271,90.0
5,003_2,3,2,0.5206,0.177413,180.0
6,004_0,4,0,0.0,0.0,90.0
7,004_1,4,1,0.202736,-0.511271,90.0
8,004_2,4,2,0.5206,0.177413,180.0
9,004_3,4,3,-0.818657,-0.228694,180.0


In [3]:
# Calculate bounding box and score for each configuration
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union

getcontext().prec = 25

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x,y) with rotation deg"""
    base_poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(base_poly, deg, origin=(0, 0))
    translated = affinity.translate(rotated, xoff=x, yoff=y)
    return translated

def calculate_score_for_n(group_df):
    """Calculate score contribution for a single n-tree configuration"""
    n = group_df['n'].iloc[0]
    polygons = []
    for _, row in group_df.iterrows():
        poly = get_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    union = unary_union(polygons)
    bounds = union.bounds  # (minx, miny, maxx, maxy)
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    side = max(width, height)
    score = (side ** 2) / n
    return side, score

# Calculate for first few configurations
results = []
for n in range(1, 21):
    group = df[df['n'] == n]
    side, score = calculate_score_for_n(group)
    results.append({'n': n, 'side': side, 'score': score})
    
results_df = pd.DataFrame(results)
print("First 20 configurations:")
print(results_df.to_string(index=False))
print(f"\nPartial score (n=1-20): {results_df['score'].sum():.6f}")

First 20 configurations:
 n     side    score
 1 1.000000 1.000000
 2 1.211271 0.733589
 3 1.670600 0.930301
 4 2.039257 1.039642
 5 2.121716 0.900336
 6 2.172745 0.786803
 7 2.901647 1.202794
 8 3.441115 1.480159
 9 3.441115 1.315697
10 3.441115 1.184127
11 3.441115 1.076479
12 3.441115 0.986773
13 4.147386 1.323139
14 4.147386 1.228629
15 4.147386 1.146721
16 4.147386 1.075051
17 4.147386 1.011812
18 4.147386 0.955601
19 4.147386 0.905306
20 4.147386 0.860041

Partial score (n=1-20): 21.143000
