# Baseline: Pre-optimized Santa 2025 Submission

This notebook establishes the baseline by using the pre-optimized submission from snapshots.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import math
import os

# Tree geometry (15-vertex polygon)
TREE_VERTICES = [
    (0, 0.8),       # tip
    (-0.125, 0.5),  # tier 1 left
    (-0.05, 0.5),   # tier 1 inner left
    (-0.2, 0.25),   # tier 2 left
    (-0.1, 0.25),   # tier 2 inner left
    (-0.35, 0),     # tier 3 left
    (-0.075, 0),    # trunk top left
    (-0.075, -0.2), # trunk bottom left
    (0.075, -0.2),  # trunk bottom right
    (0.075, 0),     # trunk top right
    (0.35, 0),      # tier 3 right
    (0.1, 0.25),    # tier 2 inner right
    (0.2, 0.25),    # tier 2 right
    (0.05, 0.5),    # tier 1 inner right
    (0.125, 0.5),   # tier 1 right
]

def create_tree_polygon(x, y, angle_deg):
    """Create a tree polygon at position (x, y) with rotation angle_deg."""
    poly = Polygon(TREE_VERTICES)
    poly = rotate(poly, angle_deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    """Get the side length of the square bounding box containing all polygons."""
    if not polygons:
        return 0
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    width = max(xs) - min(xs)
    height = max(ys) - min(ys)
    return max(width, height)

print("Tree geometry loaded successfully")
print(f"Tree vertices: {len(TREE_VERTICES)} points")

Tree geometry loaded successfully
Tree vertices: 15 points


In [2]:
# Load the pre-optimized submission
baseline_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df = pd.read_csv(baseline_path)
print(f"Loaded {len(df)} rows from baseline submission")
print(df.head(10))

Loaded 20100 rows from baseline submission
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   
5  003_2      s0.641714640229075      s1.180458566613381   
6  004_0  s-0.324747789589372171   s0.132109978088185392   
7  004_1   s0.315354346242637695   s0.132109978063475492   
8  004_2   s0.324747789592379210  s-0.732109978069475531   
9  004_3  s-0.315354348134818330  s-0.732109978094185987   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  
5      s155.13405193710082  
6  s156.370622145636389178  
7  s156.370622269264089255  
8  s336.370622269264003990  
9  s33

In [3]:
# Parse the submission format (values prefixed with 's')
def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

# Parse all values
df['x_val'] = df['x'].apply(parse_value)
df['y_val'] = df['y'].apply(parse_value)
df['deg_val'] = df['deg'].apply(parse_value)

# Extract N from id (format: NNN_i)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"N values range: {df['n'].min()} to {df['n'].max()}")
print(f"\nSample parsed data:")
print(df[['id', 'x_val', 'y_val', 'deg_val', 'n', 'tree_idx']].head(10))

N values range: 1 to 200

Sample parsed data:
      id      x_val      y_val     deg_val  n  tree_idx
0  001_0 -48.196086  58.770985   45.000000  1         0
1  002_0   0.154097  -0.038541  203.629378  2         0
2  002_1  -0.154097  -0.561459   23.629378  2         1
3  003_0   1.123656   0.781102  111.125132  3         0
4  003_1   1.234056   1.276000   66.370622  3         1
5  003_2   0.641715   1.180459  155.134052  3         2
6  004_0  -0.324748   0.132110  156.370622  4         0
7  004_1   0.315354   0.132110  156.370622  4         1
8  004_2   0.324748  -0.732110  336.370622  4         2
9  004_3  -0.315354  -0.732110  336.370622  4         3


In [4]:
# Calculate bounding box side length for each N
side_lengths = {}

for n in range(1, 201):
    n_data = df[df['n'] == n]
    if len(n_data) != n:
        print(f"Warning: N={n} has {len(n_data)} trees instead of {n}")
        continue
    
    polygons = []
    for _, row in n_data.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    side = get_bounding_box_side(polygons)
    side_lengths[n] = side

print(f"Calculated side lengths for {len(side_lengths)} N values")

Calculated side lengths for 200 N values


In [5]:
# Calculate the total score
def calculate_score(side_lengths_dict):
    score = 0
    for n, side in side_lengths_dict.items():
        score += side**2 / n
    return score

total_score = calculate_score(side_lengths)
print(f"\n=== BASELINE SCORE ===")
print(f"Total Score: {total_score:.6f}")
print(f"Target: 68.894234")
print(f"Gap to target: {total_score - 68.894234:.6f}")

# Show per-N contribution
print(f"\n=== Per-N Score Contribution (top 20) ===")
contributions = [(n, side**2/n, side) for n, side in side_lengths.items()]
contributions.sort(key=lambda x: -x[1])
for n, contrib, side in contributions[:20]:
    print(f"N={n:3d}: side={side:.4f}, contribution={contrib:.4f}")


=== BASELINE SCORE ===
Total Score: 70.676102
Target: 68.894234
Gap to target: 1.781868

=== Per-N Score Contribution (top 20) ===
N=  1: side=0.8132, contribution=0.6612
N=  2: side=0.9495, contribution=0.4508
N=  3: side=1.1420, contribution=0.4347
N=  5: side=1.4437, contribution=0.4168
N=  4: side=1.2908, contribution=0.4165
N=  7: side=1.6731, contribution=0.3999
N=  6: side=1.5484, contribution=0.3996
N=  9: side=1.8673, contribution=0.3874
N=  8: side=1.7559, contribution=0.3854
N= 15: side=2.3850, contribution=0.3792
N= 10: side=1.9407, contribution=0.3766
N= 21: side=2.8117, contribution=0.3765
N= 20: side=2.7425, contribution=0.3761
N= 11: side=2.0330, contribution=0.3757
N= 22: side=2.8733, contribution=0.3753
N= 16: side=2.4466, contribution=0.3741
N= 26: side=3.1183, contribution=0.3740
N= 12: side=2.1149, contribution=0.3727
N= 13: side=2.2000, contribution=0.3723
N= 25: side=3.0502, contribution=0.3721


In [6]:
# Copy baseline to submission folder
import shutil

os.makedirs('/home/submission', exist_ok=True)
shutil.copy(baseline_path, '/home/submission/submission.csv')
print(f"Copied baseline to /home/submission/submission.csv")

# Verify the copy
df_check = pd.read_csv('/home/submission/submission.csv')
print(f"Verified: {len(df_check)} rows in submission")

Copied baseline to /home/submission/submission.csv
Verified: 20100 rows in submission


In [7]:
# Save metrics
import json

metrics = {
    'cv_score': total_score,
    'target': 68.894234,
    'gap': total_score - 68.894234,
    'n_values': 200
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Saved metrics to experiments/001_baseline/metrics.json")
print(f"\nFinal baseline score: {total_score:.6f}")

Saved metrics to experiments/001_baseline/metrics.json

Final baseline score: 70.676102
