# Baseline Experiment - Find Best Pre-Optimized Submission

This notebook:
1. Loads all pre-optimized submissions from snapshots
2. Calculates scores for each
3. Selects the best one as our baseline
4. Copies it to submission folder

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os
import glob
import json
from tqdm import tqdm

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, angle):
    """Create a tree polygon at position (x, y) with rotation angle."""
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def calculate_bounding_box_side(trees):
    """Calculate the side length of the bounding square for a list of trees."""
    all_points = []
    for x, y, angle in trees:
        poly = get_tree_polygon(x, y, angle)
        all_points.extend(list(poly.exterior.coords))
    points = np.array(all_points)
    min_xy = points.min(axis=0)
    max_xy = points.max(axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def load_submission(path):
    """Load a submission CSV and return trees by N."""
    df = pd.read_csv(path)
    trees_by_n = {}
    
    for _, row in df.iterrows():
        id_parts = row['id'].split('_')
        n = int(id_parts[0])
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        
        if n not in trees_by_n:
            trees_by_n[n] = []
        trees_by_n[n].append((x, y, deg))
    
    return trees_by_n

def calculate_total_score(trees_by_n):
    """Calculate total score: sum(s_n^2 / n) for n=1 to 200."""
    total = 0
    per_n_scores = {}
    for n in range(1, 201):
        if n in trees_by_n:
            side = calculate_bounding_box_side(trees_by_n[n])
            score = (side ** 2) / n
            per_n_scores[n] = score
            total += score
    return total, per_n_scores

print("Functions defined successfully!")

Functions defined successfully!


In [2]:
# Find all submission files
snapshot_dir = '/home/nonroot/snapshots/santa-2025'
submission_files = glob.glob(f'{snapshot_dir}/*/submission/submission.csv')
print(f"Found {len(submission_files)} submission files")

Found 87 submission files


In [None]:
# Calculate scores for all submissions (sample first to find best)
# This can take a while, so let's sample a few first

best_score = float('inf')
best_file = None
best_per_n = None

# Sample 20 submissions to find a good one quickly
sample_files = submission_files[:20]

for f in tqdm(sample_files, desc="Evaluating submissions"):
    try:
        trees_by_n = load_submission(f)
        score, per_n = calculate_total_score(trees_by_n)
        if score < best_score:
            best_score = score
            best_file = f
            best_per_n = per_n
            print(f"New best: {score:.6f} from {f}")
    except Exception as e:
        print(f"Error with {f}: {e}")

print(f"\nBest score from sample: {best_score:.6f}")
print(f"Best file: {best_file}")

In [None]:
# Check more submissions to find the absolute best
remaining_files = submission_files[20:]

for f in tqdm(remaining_files, desc="Checking remaining"):
    try:
        trees_by_n = load_submission(f)
        score, per_n = calculate_total_score(trees_by_n)
        if score < best_score:
            best_score = score
            best_file = f
            best_per_n = per_n
            print(f"New best: {score:.6f} from {f}")
    except Exception as e:
        pass  # Skip errors silently

print(f"\nFinal best score: {best_score:.6f}")
print(f"Best file: {best_file}")

In [None]:
# Analyze the best submission - per-N breakdown
print("\nPer-N Score Analysis (top 20 contributors):")
print("="*50)

# Sort by score contribution (highest first)
sorted_scores = sorted(best_per_n.items(), key=lambda x: x[1], reverse=True)

for n, score in sorted_scores[:20]:
    trees = load_submission(best_file)[n]
    side = calculate_bounding_box_side(trees)
    print(f"N={n:3d}: side={side:.6f}, score={score:.6f}")

print(f"\nTotal score: {best_score:.6f}")

In [None]:
# Copy best submission to our submission folder
import shutil

os.makedirs('/home/submission', exist_ok=True)
shutil.copy(best_file, '/home/submission/submission.csv')

# Also save to experiment folder
shutil.copy(best_file, '/home/code/experiments/000_baseline/submission.csv')

print(f"Copied best submission to /home/submission/submission.csv")
print(f"Score: {best_score:.6f}")

In [None]:
# Save metrics
metrics = {
    'cv_score': best_score,
    'best_file': best_file,
    'per_n_scores': {str(k): v for k, v in best_per_n.items()}
}

with open('/home/code/experiments/000_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Saved metrics to experiments/000_baseline/metrics.json")
print(f"\nBaseline CV Score: {best_score:.6f}")