# Baseline Experiment - Santa 2025 Christmas Tree Packing

Establish baseline by evaluating available pre-optimized submissions and creating an ensemble.

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import os

getcontext().prec = 25
scale_factor = Decimal('1e15')

In [2]:
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        """Initializes the Christmas tree with a specific position and rotation."""
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon(
            [
                (Decimal('0.0') * scale_factor, tip_y * scale_factor),
                (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
                (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
                (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
                (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
                (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
                (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
                (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
                (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
            ]
        )
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

In [3]:
def has_overlap(trees):
    """Check if any two ChristmasTree polygons overlap."""
    if len(trees) <= 1:
        return False
    
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False


def get_side_length(trees):
    """Get the side length of the bounding box for a list of trees."""
    if not trees:
        return Decimal('0')
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    width = maxx - minx
    height = maxy - miny
    return max(width, height)


def calculate_score(side_lengths):
    """Calculate the total score from side lengths dict."""
    score = Decimal('0')
    for n, side in side_lengths.items():
        score += side ** 2 / Decimal(str(n))
    return float(score)

In [4]:
def load_submission(csv_path):
    """Load a submission CSV and return dict of tree lists and side lengths."""
    df = pd.read_csv(csv_path)
    df['x'] = df['x'].astype(str).str.lstrip('s')
    df['y'] = df['y'].astype(str).str.lstrip('s')
    df['deg'] = df['deg'].astype(str).str.lstrip('s')
    
    tree_lists = {}
    side_lengths = {}
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        group = df[df['id'].str.startswith(prefix)]
        trees = [ChristmasTree(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
        tree_lists[n] = trees
        side_lengths[n] = get_side_length(trees)
    
    return tree_lists, side_lengths

In [5]:
# Evaluate sample submission
print("Evaluating sample submission...")
sample_trees, sample_sides = load_submission('/home/code/preoptimized/sample_submission.csv')
sample_score = calculate_score(sample_sides)
print(f"Sample submission score: {sample_score:.6f}")

Evaluating sample submission...


Sample submission score: 173.652299


In [6]:
# Evaluate saspav_best submission
print("\nEvaluating saspav_best submission...")
saspav_trees, saspav_sides = load_submission('/home/code/preoptimized/saspav_best.csv')
saspav_score = calculate_score(saspav_sides)
print(f"saspav_best score: {saspav_score:.6f}")

# Check for overlaps
overlap_count = 0
for n in range(1, 201):
    if has_overlap(saspav_trees[n]):
        overlap_count += 1
print(f"Overlapping N values: {overlap_count}")


Evaluating saspav_best submission...


saspav_best score: 70.630478


Overlapping N values: 9


In [7]:
# Evaluate best_snapshot submission
print("\nEvaluating best_snapshot submission...")
snapshot_trees, snapshot_sides = load_submission('/home/code/preoptimized/best_snapshot.csv')
snapshot_score = calculate_score(snapshot_sides)
print(f"best_snapshot score: {snapshot_score:.6f}")

# Check for overlaps
overlap_count = 0
for n in range(1, 201):
    if has_overlap(snapshot_trees[n]):
        overlap_count += 1
print(f"Overlapping N values: {overlap_count}")


Evaluating best_snapshot submission...


best_snapshot score: 70.627582


Overlapping N values: 2


In [8]:
# Create ensemble - pick best valid solution for each N
print("\nCreating ensemble...")

ensemble_trees = {}
ensemble_sides = {}
source_counts = {'sample': 0, 'saspav': 0, 'snapshot': 0}

for n in range(1, 201):
    candidates = []
    
    # Sample submission
    if not has_overlap(sample_trees[n]):
        candidates.append(('sample', sample_trees[n], sample_sides[n]))
    
    # Saspav submission
    if not has_overlap(saspav_trees[n]):
        candidates.append(('saspav', saspav_trees[n], saspav_sides[n]))
    
    # Snapshot submission
    if not has_overlap(snapshot_trees[n]):
        candidates.append(('snapshot', snapshot_trees[n], snapshot_sides[n]))
    
    if candidates:
        # Pick the one with smallest side length
        best = min(candidates, key=lambda x: x[2])
        ensemble_trees[n] = best[1]
        ensemble_sides[n] = best[2]
        source_counts[best[0]] += 1
    else:
        # Fallback to sample (should always be valid)
        ensemble_trees[n] = sample_trees[n]
        ensemble_sides[n] = sample_sides[n]
        source_counts['sample'] += 1

ensemble_score = calculate_score(ensemble_sides)
print(f"Ensemble score: {ensemble_score:.6f}")
print(f"Source distribution: {source_counts}")


Creating ensemble...


Ensemble score: 70.627582
Source distribution: {'sample': 0, 'saspav': 179, 'snapshot': 21}


In [9]:
# Save ensemble submission
print("\nSaving ensemble submission...")

rows = []
for n in range(1, 201):
    trees = ensemble_trees[n]
    for t_idx, tree in enumerate(trees):
        rows.append({
            'id': f'{n:03d}_{t_idx}',
            'x': f's{float(tree.center_x):.12f}',
            'y': f's{float(tree.center_y):.12f}',
            'deg': f's{float(tree.angle):.12f}'
        })

submission_df = pd.DataFrame(rows)
submission_df.to_csv('/home/code/experiments/001_baseline/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved submission with {len(submission_df)} rows")
print(f"\nFinal ensemble score: {ensemble_score:.6f}")
print(f"Target score: 68.901319")
print(f"Gap to target: {ensemble_score - 68.901319:.6f}")


Saving ensemble submission...
Saved submission with 20100 rows

Final ensemble score: 70.627582
Target score: 68.901319
Gap to target: 1.726263


In [10]:
# Analyze score breakdown by N ranges
print("\nScore breakdown by N ranges:")
ranges = [(1, 10), (11, 20), (21, 50), (51, 100), (101, 150), (151, 200)]

for start, end in ranges:
    range_score = sum(float(ensemble_sides[n]**2 / Decimal(str(n))) for n in range(start, end+1))
    print(f"N={start:3d}-{end:3d}: {range_score:.6f}")


Score breakdown by N ranges:
N=  1- 10: 4.329128
N= 11- 20: 3.724074
N= 21- 50: 10.980714
N= 51-100: 17.614300
N=101-150: 17.136399
N=151-200: 16.842967


In [11]:
# Show top 10 worst N values (highest contribution to score)
print("\nTop 10 worst N values (highest score contribution):")
contributions = [(n, float(ensemble_sides[n]**2 / Decimal(str(n)))) for n in range(1, 201)]
contributions.sort(key=lambda x: x[1], reverse=True)

for n, contrib in contributions[:10]:
    print(f"N={n:3d}: contribution={contrib:.6f}, side_length={float(ensemble_sides[n]):.6f}")


Top 10 worst N values (highest score contribution):
N=  1: contribution=0.661250, side_length=0.813173
N=  2: contribution=0.450779, side_length=0.949504
N=  3: contribution=0.434745, side_length=1.142031
N=  5: contribution=0.416850, side_length=1.443692
N=  4: contribution=0.416545, side_length=1.290806
N=  7: contribution=0.399897, side_length=1.673104
N=  6: contribution=0.399610, side_length=1.548438
N=  9: contribution=0.387415, side_length=1.867280
N=  8: contribution=0.385407, side_length=1.755921
N= 15: contribution=0.376978, side_length=2.377955


In [12]:
# Check which N values have overlaps in saspav and snapshot
print("N values with overlaps in saspav_best:")
saspav_overlaps = []
for n in range(1, 201):
    if has_overlap(saspav_trees[n]):
        saspav_overlaps.append(n)
        print(f"  N={n}: side_length={float(saspav_sides[n]):.6f}")

print("\nN values with overlaps in best_snapshot:")
snapshot_overlaps = []
for n in range(1, 201):
    if has_overlap(snapshot_trees[n]):
        snapshot_overlaps.append(n)
        print(f"  N={n}: side_length={float(snapshot_sides[n]):.6f}")

N values with overlaps in saspav_best:
  N=33: side_length=3.491250
  N=42: side_length=3.925203
  N=46: side_length=4.094486
  N=48: side_length=4.131033


  N=134: side_length=6.813755
  N=138: side_length=6.860167
  N=154: side_length=7.173582


  N=165: side_length=7.441026
  N=166: side_length=7.455196



N values with overlaps in best_snapshot:


  N=158: side_length=7.343104


  N=184: side_length=7.902634


In [14]:
# Load and evaluate more submissions for ensemble
print("Loading additional submissions...")

all_submissions = {
    'sample': (sample_trees, sample_sides),
    'saspav': (saspav_trees, saspav_sides),
    'snapshot': (snapshot_trees, snapshot_sides),
}

# Load additional submissions
additional_files = [
    ('bucket_of_chump', '/home/code/preoptimized/bucket_of_chump.csv'),
    ('chistyakov', '/home/code/preoptimized/chistyakov_best.csv'),
    ('sub_70_926', '/home/code/preoptimized/submission_70_926.csv'),
    ('better_ensemble', '/home/code/preoptimized/better_ensemble.csv'),
]

for name, path in additional_files:
    try:
        trees, sides = load_submission(path)
        score = calculate_score(sides)
        overlap_count = sum(1 for n in range(1, 201) if has_overlap(trees[n]))
        print(f"{name}: score={score:.6f}, overlaps={overlap_count}")
        all_submissions[name] = (trees, sides)
    except Exception as e:
        print(f"{name}: Error - {e}")

Loading additional submissions...


bucket_of_chump: score=70.676501, overlaps=11


chistyakov: score=70.926150, overlaps=0


sub_70_926: score=70.926150, overlaps=0


better_ensemble: score=70.647306, overlaps=13
