# Quick EDA - Christmas Tree Packing

In [1]:
import pandas as pd
import numpy as np

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f'Shape: {df.shape}')
print(f'Columns: {df.columns.tolist()}')
df.head(10)

Shape: (20100, 4)
Columns: ['id', 'x', 'y', 'deg']


Unnamed: 0,id,x,y,deg
0,001_0,s0.0,s0.0,s90.0
1,002_0,s0.0,s0.0,s90.0
2,002_1,s0.202736,s-0.511271,s90.0
3,003_0,s0.0,s0.0,s90.0
4,003_1,s0.202736,s-0.511271,s90.0
5,003_2,s0.5206,s0.177413,s180.0
6,004_0,s0.0,s0.0,s90.0
7,004_1,s0.202736,s-0.511271,s90.0
8,004_2,s0.5206,s0.177413,s180.0
9,004_3,s-0.818657,s-0.228694,s180.0


In [2]:
# Analyze the structure - how many trees per configuration
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"Number of configurations: {df['n'].nunique()}")
print(f"Configuration range: {df['n'].min()} to {df['n'].max()}")
print(f"Total trees: {len(df)}")
print(f"Expected total: {sum(range(1, 201))} = 1+2+...+200")

# Verify counts per configuration
counts = df.groupby('n').size()
print(f"\nFirst 10 configurations have trees: {counts.head(10).tolist()}")
print(f"Last 5 configurations have trees: {counts.tail(5).tolist()}")

Number of configurations: 200
Configuration range: 1 to 200
Total trees: 20100
Expected total: 20100 = 1+2+...+200

First 10 configurations have trees: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Last 5 configurations have trees: [196, 197, 198, 199, 200]


In [None]:
# Calculate score for sample submission\nfrom decimal import Decimal, getcontext\nfrom shapely import affinity\nfrom shapely.geometry import Polygon\nfrom shapely.ops import unary_union\n\ngetcontext().prec = 25\nscale_factor = Decimal('1')\n\nclass ChristmasTree:\n    def __init__(self, center_x='0', center_y='0', angle='0'):\n        self.center_x = Decimal(center_x)\n        self.center_y = Decimal(center_y)\n        self.angle = Decimal(angle)\n\n        trunk_w = Decimal('0.15')\n        trunk_h = Decimal('0.2')\n        base_w = Decimal('0.7')\n        mid_w = Decimal('0.4')\n        top_w = Decimal('0.25')\n        tip_y = Decimal('0.8')\n        tier_1_y = Decimal('0.5')\n        tier_2_y = Decimal('0.25')\n        base_y = Decimal('0.0')\n        trunk_bottom_y = -trunk_h\n\n        initial_polygon = Polygon([\n            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),\n            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),\n            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),\n            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),\n            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),\n            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),\n            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),\n            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),\n            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),\n            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),\n            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),\n            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),\n            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),\n            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),\n            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),\n        ])\n        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))\n        self.polygon = affinity.translate(rotated,\n                                          xoff=float(self.center_x * scale_factor),\n                                          yoff=float(self.center_y * scale_factor))\n\ndef get_score_for_n(df_group, n):\n    trees = []\n    for _, row in df_group.iterrows():\n        x = str(row['x'])[1:]  # Remove 's' prefix\n        y = str(row['y'])[1:]\n        deg = str(row['deg'])[1:]\n        trees.append(ChristmasTree(x, y, deg))\n    \n    all_polygons = [t.polygon for t in trees]\n    bounds = unary_union(all_polygons).bounds\n    side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])\n    return side**2 / n\n\n# Calculate total score\ntotal_score = 0\nfor n in range(1, 201):\n    group = df[df['n'] == n]\n    score = get_score_for_n(group, n)\n    total_score += score\n    if n <= 5 or n >= 198:\n        print(f'n={n}: score={score:.6f}')\n\nprint(f'\\nTotal Score: {total_score:.6f}')