# Baseline Analysis - Santa 2025

Load and verify the best pre-optimized submission from snapshots.

In [4]:
import pandas as pd
import numpy as np
from collections import defaultdict

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_vertices(x, y, angle_deg):
    """Get tree polygon vertices at position (x,y) with rotation angle_deg."""
    rad = np.radians(angle_deg)
    cos_a, sin_a = np.cos(rad), np.sin(rad)
    
    # Rotate then translate
    rx = TX * cos_a - TY * sin_a + x
    ry = TX * sin_a + TY * cos_a + y
    
    return rx, ry

def calculate_score_for_n(trees):
    """Calculate score for a single N value."""
    all_xs = []
    all_ys = []
    for x, y, angle in trees:
        rx, ry = get_tree_vertices(x, y, angle)
        all_xs.extend(rx)
        all_ys.extend(ry)
    
    width = max(all_xs) - min(all_xs)
    height = max(all_ys) - min(all_ys)
    side = max(width, height)
    n = len(trees)
    return (side ** 2) / n

print("Functions defined successfully")

Functions defined successfully


In [5]:
# Load the baseline submission
import os
os.chdir('/home/code/experiments/000_baseline')
df = pd.read_csv('submission.csv')
print(f"Submission shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print(f"\nFirst 10 rows:")
print(df.head(10))

Submission shape: (20100, 4)
Columns: ['id', 'x', 'y', 'deg']

First 10 rows:
      id                      x                      y                  deg
0  001_0     s43.59119209210215    s-31.78326706874178   s44.99999999999998
1  002_0    s0.1540970696213559  s-0.03854074269479465  s144.27276086312358
2  002_1  s-0.15409706962137285   s-0.5614592573052241  s324.27276086312355
3  003_0     s0.254937643697833    s-0.233436061549416  s113.56326044172948
4  003_1     s0.357722754471247     s0.250360566787394     s66.370622269343
5  003_2    s-0.234618301141838     s0.154819632737017  s155.13405193710082
6  004_0  s-0.29385141092425193    s0.1318322746912058    s154.766362231348
7  004_1    s0.3093423021943079   s0.12387011696071852   s154.7663623549757
8  004_2    s0.3016776723386178   s-0.7246632361943603   s334.7663623549756
9  004_3  s-0.30140730580875774   s-0.7147976146068641  s334.76636223134807


In [6]:
# Parse the submission
def parse_submission(df):
    """Parse submission CSV into dict of n -> list of (x, y, angle) tuples."""
    configs = defaultdict(list)
    
    for _, row in df.iterrows():
        # Parse id to get n and tree index
        parts = row['id'].split('_')
        n = int(parts[0])
        
        # Parse coordinates (remove 's' prefix)
        x = float(str(row['x']).replace('s', ''))
        y = float(str(row['y']).replace('s', ''))
        deg = float(str(row['deg']).replace('s', ''))
        
        configs[n].append((x, y, deg))
    
    return dict(configs)

configs = parse_submission(df)
print(f"Parsed {len(configs)} N values")
print(f"N=1 has {len(configs[1])} trees")
print(f"N=200 has {len(configs[200])} trees")
print(f"\nN=1 config: {configs[1]}")
print(f"N=2 config: {configs[2]}")

Parsed 200 N values
N=1 has 1 trees
N=200 has 200 trees

N=1 config: [(43.59119209210215, -31.78326706874178, 44.99999999999998)]
N=2 config: [(0.1540970696213559, -0.03854074269479465, 144.27276086312358), (-0.15409706962137285, -0.5614592573052241, 324.27276086312355)]


In [7]:
# Calculate total score
def calculate_total_score(configs):
    """Calculate total score for all N values."""
    total = 0
    scores_by_n = {}
    
    for n in range(1, 201):
        if n not in configs:
            print(f"WARNING: Missing N={n}")
            continue
        
        trees = configs[n]
        if len(trees) != n:
            print(f"WARNING: N={n} has {len(trees)} trees instead of {n}")
        
        score_n = calculate_score_for_n(trees)
        scores_by_n[n] = score_n
        total += score_n
    
    return total, scores_by_n

total_score, scores_by_n = calculate_total_score(configs)
print(f"\nTotal Score: {total_score:.6f}")
print(f"Target: 68.882921")
print(f"Gap: {total_score - 68.882921:.6f}")


Total Score: 70.523320
Target: 68.882921
Gap: 1.640399


In [8]:
# Analyze score contributions by N
print("\nTop 10 highest score contributions (worst N values):")
sorted_scores = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    print(f"  N={n}: {score:.6f}")

print("\nTop 10 lowest score contributions (best N values):")
for n, score in sorted_scores[-10:]:
    print(f"  N={n}: {score:.6f}")

print(f"\nN=1 score: {scores_by_n[1]:.6f} (theoretical optimal: 0.6612)")
print(f"N=2 score: {scores_by_n[2]:.6f}")
print(f"N=3 score: {scores_by_n[3]:.6f}")


Top 10 highest score contributions (worst N values):
  N=1: 0.661250
  N=2: 0.437328
  N=3: 0.434745
  N=4: 0.411056
  N=7: 0.399897
  N=6: 0.399610
  N=5: 0.394109
  N=9: 0.387415
  N=8: 0.385407
  N=15: 0.376949

Top 10 lowest score contributions (best N values):
  N=194: 0.332999
  N=195: 0.332576
  N=179: 0.332571
  N=167: 0.332129
  N=155: 0.331928
  N=168: 0.331548
  N=180: 0.331000
  N=182: 0.329988
  N=181: 0.329945
  N=156: 0.329912

N=1 score: 0.661250 (theoretical optimal: 0.6612)
N=2 score: 0.437328
N=3 score: 0.434745


In [9]:
# Save metrics
import json

metrics = {
    'cv_score': total_score,
    'target': 68.882921,
    'gap': total_score - 68.882921,
    'scores_by_n': {str(k): v for k, v in scores_by_n.items()}
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved to metrics.json")
print(f"\nBaseline CV Score: {total_score:.6f}")

Metrics saved to metrics.json

Baseline CV Score: 70.523320
