In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext

# Load sample submission
df = pd.read_csv('/home/data/sample_submission.csv')
print(f'Shape: {df.shape}')
print(df.head(20))

Shape: (20100, 4)
       id           x           y     deg
0   001_0        s0.0        s0.0   s90.0
1   002_0        s0.0        s0.0   s90.0
2   002_1   s0.202736  s-0.511271   s90.0
3   003_0        s0.0        s0.0   s90.0
4   003_1   s0.202736  s-0.511271   s90.0
5   003_2     s0.5206   s0.177413  s180.0
6   004_0        s0.0        s0.0   s90.0
7   004_1   s0.202736  s-0.511271   s90.0
8   004_2     s0.5206   s0.177413  s180.0
9   004_3  s-0.818657  s-0.228694  s180.0
10  005_0        s0.0        s0.0   s90.0
11  005_1   s0.202736  s-0.511271   s90.0
12  005_2     s0.5206   s0.177413  s180.0
13  005_3  s-0.818657  s-0.228694  s180.0
14  005_4   s0.111852   s0.893022  s180.0
15  006_0        s0.0        s0.0   s90.0
16  006_1   s0.202736  s-0.511271   s90.0
17  006_2     s0.5206   s0.177413  s180.0
18  006_3  s-0.818657  s-0.228694  s180.0
19  006_4   s0.111852   s0.893022  s180.0


In [2]:
# Calculate score for sample submission
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union

# Tree geometry
trunk_w, trunk_h = 0.15, 0.2
base_w, mid_w, top_w = 0.7, 0.4, 0.25
tip_y, tier_1_y, tier_2_y, base_y = 0.8, 0.5, 0.25, 0.0
trunk_bottom_y = -trunk_h

tree_vertices = [
    (0.0, tip_y),
    (top_w/2, tier_1_y), (top_w/4, tier_1_y),
    (mid_w/2, tier_2_y), (mid_w/4, tier_2_y),
    (base_w/2, base_y),
    (trunk_w/2, base_y), (trunk_w/2, trunk_bottom_y),
    (-trunk_w/2, trunk_bottom_y), (-trunk_w/2, base_y),
    (-base_w/2, base_y),
    (-mid_w/4, tier_2_y), (-mid_w/2, tier_2_y),
    (-top_w/4, tier_1_y), (-top_w/2, tier_1_y),
]

base_tree = Polygon(tree_vertices)
print(f"Tree bounding box: {base_tree.bounds}")
print(f"Tree area: {base_tree.area:.4f}")

Tree bounding box: (-0.35, -0.2, 0.35, 0.8)
Tree area: 0.2456


In [3]:
# Calculate score for the pre-optimized submission
import os

# Load the best pre-optimized submission
best_csv = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df_best = pd.read_csv(best_csv)

# Parse the data
df_best['x'] = df_best['x'].str.lstrip('s').astype(float)
df_best['y'] = df_best['y'].str.lstrip('s').astype(float)
df_best['deg'] = df_best['deg'].str.lstrip('s').astype(float)
df_best[['n', 'tree_idx']] = df_best['id'].str.split('_', expand=True)
df_best['n'] = df_best['n'].astype(int)

print(f"Shape: {df_best.shape}")
print(f"N values: {df_best['n'].min()} to {df_best['n'].max()}")
print(df_best.head())

Shape: (20100, 6)
N values: 1 to 200
      id          x          y         deg  n tree_idx
0  001_0 -48.196086  58.770985   45.000000  1        0
1  002_0   0.154097  -0.038541  203.629378  2        0
2  002_1  -0.154097  -0.561459   23.629378  2        1
3  003_0   1.123656   0.781102  111.125132  3        0
4  003_1   1.234056   1.276000   66.370622  3        1


In [4]:
# Calculate score for each N
def get_tree_polygon(x, y, deg):
    poly = affinity.rotate(base_tree, deg, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x, yoff=y)
    return poly

def calculate_side_length(group):
    polygons = [get_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

# Calculate for each N
scores_by_n = []
for n in range(1, 201):
    group = df_best[df_best['n'] == n]
    side = calculate_side_length(group)
    score_n = side**2 / n
    scores_by_n.append({'n': n, 'side': side, 'score': score_n})

scores_df = pd.DataFrame(scores_by_n)
total_score = scores_df['score'].sum()
print(f"Total score: {total_score:.6f}")
print(f"\\nTop 10 contributors to score:")
print(scores_df.nlargest(10, 'score')[['n', 'side', 'score']])

Total score: 70.676102
\nTop 10 contributors to score:
     n      side     score
0    1  0.813173  0.661250
1    2  0.949504  0.450779
2    3  1.142031  0.434745
4    5  1.443692  0.416850
3    4  1.290806  0.416545
6    7  1.673104  0.399897
5    6  1.548438  0.399610
8    9  1.867280  0.387415
7    8  1.755921  0.385407
14  15  2.384962  0.379203


In [5]:
# Analyze N=1 - this is the highest contributor
n1 = df_best[df_best['n'] == 1]
print(f"N=1 configuration:")
print(n1)
print(f"\\nDegree: {n1['deg'].values[0]}")

# For N=1, the optimal angle should minimize the bounding box
# Let's check what angle minimizes the bounding box
import matplotlib.pyplot as plt

angles = np.linspace(0, 90, 901)
sides = []
for angle in angles:
    poly = affinity.rotate(base_tree, angle, origin=(0, 0))
    bounds = poly.bounds
    side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
    sides.append(side)

min_idx = np.argmin(sides)
print(f"\\nOptimal angle for N=1: {angles[min_idx]:.1f} degrees")
print(f"Minimum side length: {sides[min_idx]:.6f}")
print(f"Current side length: {scores_df[scores_df['n']==1]['side'].values[0]:.6f}")

N=1 configuration:
      id          x          y   deg  n tree_idx
0  001_0 -48.196086  58.770985  45.0  1        0
\nDegree: 45.0


\nOptimal angle for N=1: 45.0 degrees
Minimum side length: 0.813173
Current side length: 0.813173


In [6]:
# Analyze efficiency (trees per unit area) by N
scores_df['efficiency'] = scores_df['n'] / (scores_df['side'] ** 2)
print("Efficiency analysis (trees per unit area):")
print(scores_df[['n', 'side', 'score', 'efficiency']].head(20))
print(f"\\nLowest efficiency (hardest to pack):")
print(scores_df.nsmallest(10, 'efficiency')[['n', 'side', 'score', 'efficiency']])

Efficiency analysis (trees per unit area):
     n      side     score  efficiency
0    1  0.813173  0.661250    1.512287
1    2  0.949504  0.450779    2.218381
2    3  1.142031  0.434745    2.300198
3    4  1.290806  0.416545    2.400702
4    5  1.443692  0.416850    2.398947
5    6  1.548438  0.399610    2.502438
6    7  1.673104  0.399897    2.500647
7    8  1.755921  0.385407    2.594658
8    9  1.867280  0.387415    2.581211
9   10  1.940696  0.376630    2.655125
10  11  2.033002  0.375736    2.661443
11  12  2.114873  0.372724    2.682950
12  13  2.200046  0.372323    2.685839
13  14  2.277711  0.370569    2.698551
14  15  2.384962  0.379203    2.637111
15  16  2.446640  0.374128    2.672882
16  17  2.508124  0.370040    2.702408
17  18  2.576409  0.368771    2.711708
18  19  2.646449  0.368615    2.712855
19  20  2.742469  0.376057    2.659172
\nLowest efficiency (hardest to pack):
     n      side     score  efficiency
0    1  0.813173  0.661250    1.512287
1    2  0.949504  0.4

In [None]:
# Calculate cumulative score contribution
scores_df['cumulative_score'] = scores_df['score'].cumsum()
scores_df['pct_of_total'] = scores_df['score'] / total_score * 100
scores_df['cumulative_pct'] = scores_df['cumulative_score'] / total_score * 100

print("Score contribution analysis:")
print(f"N=1 to N=10 contributes: {scores_df[scores_df['n']<=10]['score'].sum():.4f} ({scores_df[scores_df['n']<=10]['score'].sum()/total_score*100:.2f}%)")
print(f"N=1 to N=50 contributes: {scores_df[scores_df['n']<=50]['score'].sum():.4f} ({scores_df[scores_df['n']<=50]['score'].sum()/total_score*100:.2f}%)")
print(f"N=51 to N=100 contributes: {scores_df[(scores_df['n']>50) & (scores_df['n']<=100)]['score'].sum():.4f}")
print(f"N=101 to N=200 contributes: {scores_df[scores_df['n']>100]['score'].sum():.4f}")