# Baseline Submission - Pre-optimized CSV

This notebook verifies the score of the best pre-optimized CSV (santa-2025.csv).

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 30

print("Loading submission...")
df = pd.read_csv('/home/submission/submission.csv')
print(f"Shape: {df.shape}")
print(df.head())

In [None]:
# Parse the 's' prefix from values
def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

# Tree geometry
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)
        
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0')), float(tip_y)),
            (float(top_w / Decimal('2')), float(tier_1_y)),
            (float(top_w / Decimal('4')), float(tier_1_y)),
            (float(mid_w / Decimal('2')), float(tier_2_y)),
            (float(mid_w / Decimal('4')), float(tier_2_y)),
            (float(base_w / Decimal('2')), float(base_y)),
            (float(trunk_w / Decimal('2')), float(base_y)),
            (float(trunk_w / Decimal('2')), float(trunk_bottom_y)),
            (float(-(trunk_w / Decimal('2'))), float(trunk_bottom_y)),
            (float(-(trunk_w / Decimal('2'))), float(base_y)),
            (float(-(base_w / Decimal('2'))), float(base_y)),
            (float(-(mid_w / Decimal('4'))), float(tier_2_y)),
            (float(-(mid_w / Decimal('2'))), float(tier_2_y)),
            (float(-(top_w / Decimal('4'))), float(tier_1_y)),
            (float(-(top_w / Decimal('2'))), float(tier_1_y)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

print("Tree class defined.")

In [None]:
def score_submission(df):
    """Calculate the total score for a submission."""
    total_score = Decimal('0')
    scores_by_n = {}
    
    for n in range(1, 201):
        # Get trees for this n
        group = df[df['id'].str.startswith(f'{n:03d}_')]
        
        if len(group) != n:
            print(f"Warning: N={n} has {len(group)} trees instead of {n}")
            continue
            
        trees = []
        for _, row in group.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            trees.append(ChristmasTree(x, y, deg))
        
        polygons = [t.polygon for t in trees]
        bounds = unary_union(polygons).bounds
        side = Decimal(str(max(bounds[2] - bounds[0], bounds[3] - bounds[1])))
        score_n = side ** 2 / Decimal(str(n))
        total_score += score_n
        scores_by_n[n] = float(score_n)
        
        if n <= 10 or n % 20 == 0:
            print(f"N={n}: side={float(side):.6f}, score={float(score_n):.6f}")
    
    return float(total_score), scores_by_n

print("Scoring function defined.")

In [None]:
# Calculate the score
print("Calculating score...")
total_score, scores_by_n = score_submission(df)
print(f"\n=== TOTAL SCORE: {total_score:.6f} ===")

In [None]:
# Verify no overlaps (quick check on a few N values)
from shapely.strtree import STRtree

def check_overlaps(df, n):
    """Check if any trees overlap for a given N."""
    group = df[df['id'].str.startswith(f'{n:03d}_')]
    trees = []
    for _, row in group.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    
    polygons = [t.polygon for t in trees]
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-10:  # Allow tiny numerical errors
                    return True, i, j, intersection.area
    return False, None, None, 0

# Check a few N values
print("Checking for overlaps...")
for n in [10, 50, 100, 150, 200]:
    has_overlap, i, j, area = check_overlaps(df, n)
    if has_overlap:
        print(f"N={n}: OVERLAP between trees {i} and {j}, area={area}")
    else:
        print(f"N={n}: No overlaps")

print(f"\nFinal score: {total_score:.6f}")

In [None]:
# Save metrics
import json

metrics = {
    'cv_score': total_score,
    'model_type': 'pre-optimized',
    'notes': 'Baseline submission using best pre-optimized CSV (santa-2025.csv)'
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved. CV Score: {total_score:.6f}")