# Baseline: Find Best Pre-optimized Solution

This notebook finds the best pre-optimized solution from snapshots and establishes our baseline.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import os
import glob

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg."""
    poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(poly, deg, origin=(0, 0))
    return affinity.translate(rotated, x, y)

def parse_value(s):
    """Parse 's' prefixed value."""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def load_submission(path):
    """Load a submission file and parse coordinates."""
    df = pd.read_csv(path)
    df['x'] = df['x'].apply(parse_value)
    df['y'] = df['y'].apply(parse_value)
    df['deg'] = df['deg'].apply(parse_value)
    df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    return df

def calculate_score(df):
    """Calculate the competition score."""
    total_score = 0
    per_n_scores = {}
    
    for n in range(1, 201):
        trees = df[df['n'] == n]
        if len(trees) == 0:
            continue
        
        # Create all tree polygons
        polys = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in trees.iterrows()]
        
        # Get bounding box
        combined = unary_union(polys)
        bounds = combined.bounds  # (minx, miny, maxx, maxy)
        side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
        
        # Score contribution
        score_n = (side ** 2) / n
        per_n_scores[n] = {'side': side, 'score': score_n}
        total_score += score_n
    
    return total_score, per_n_scores

print("Functions defined successfully")
print(f"Tree polygon has {len(TX)} vertices")

Functions defined successfully
Tree polygon has 15 vertices


In [3]:
# Find all submission files in snapshots
submission_files = glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/submission.csv')
print(f"Found {len(submission_files)} submission files in standard location")

# Also check other locations
all_submissions = glob.glob('/home/nonroot/snapshots/santa-2025/**/submission.csv', recursive=True)
print(f"Found {len(all_submissions)} total submission files")

# Sample a few to find the best one
sampled_files = all_submissions[:50]  # Check first 50
print(f"\nSampling {len(sampled_files)} files to find best baseline...")

best_score = float('inf')
best_file = None

for f in sampled_files:
    try:
        df = load_submission(f)
        if len(df) < 20000:  # Skip incomplete files (should have ~20100 rows)
            continue
        score, _ = calculate_score(df)
        if score > 0 and score < best_score:  # Skip zero scores
            best_score = score
            best_file = f
            print(f"New best: {score:.6f} from {f}")
    except Exception as e:
        pass

print(f"\nBest score so far: {best_score:.6f}")
print(f"Best file: {best_file}")

Found 89 submission files in standard location
Found 610 total submission files

Sampling 50 files to find best baseline...


New best: 70.676102 from /home/nonroot/snapshots/santa-2025/21116303805/code/submission.csv


New best: 70.676102 from /home/nonroot/snapshots/santa-2025/21116303805/code/experiments/002_preoptimized/submission.csv


New best: 70.647327 from /home/nonroot/snapshots/santa-2025/21328309254/code/submission.csv


New best: 70.624381 from /home/nonroot/snapshots/santa-2025/21328309254/code/experiments/002_snapshot_ensemble/submission.csv


New best: 70.615745 from /home/nonroot/snapshots/santa-2025/21328309254/code/experiments/003_bbox3_optimization/submission.csv


New best: 70.523320 from /home/nonroot/snapshots/santa-2025/21328309254/code/experiments/003_valid_ensemble/submission.csv



Best score so far: 70.523320
Best file: /home/nonroot/snapshots/santa-2025/21328309254/code/experiments/003_valid_ensemble/submission.csv


In [None]:
# Check the standard submission folder files (these are likely the best)
submission_folder_files = glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/submission.csv')
print(f"Checking {len(submission_folder_files)} files from submission folders...")

for f in submission_folder_files[:30]:  # Check first 30
    try:
        df = load_submission(f)
        if len(df) < 20000:
            continue
        score, _ = calculate_score(df)
        if score > 0 and score < best_score:
            best_score = score
            best_file = f
            print(f"New best: {score:.6f} from {f}")
    except Exception as e:
        pass

print(f"\nFinal best score: {best_score:.6f}")
print(f"Final best file: {best_file}")

In [None]:
# Load the best submission and analyze it
best_df = load_submission(best_file)
print(f"Best submission has {len(best_df)} rows")
print(f"N values: {best_df['n'].min()} to {best_df['n'].max()}")

# Calculate detailed scores
total_score, per_n_scores = calculate_score(best_df)
print(f"\nTotal score: {total_score:.6f}")

# Show score contribution by N range
print("\nScore contribution by N range:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    range_score = sum(per_n_scores[n]['score'] for n in range(start, end+1) if n in per_n_scores)
    print(f"  N={start}-{end}: {range_score:.4f}")

# Show top 10 N values by score contribution
print("\nTop 10 N values by score contribution:")
sorted_n = sorted(per_n_scores.items(), key=lambda x: x[1]['score'], reverse=True)[:10]
for n, data in sorted_n:
    print(f"  N={n}: side={data['side']:.4f}, score={data['score']:.4f}")

In [None]:
# Copy best submission to our experiment folder and submission folder
import shutil

# Save to experiment folder
shutil.copy(best_file, '/home/code/experiments/001_baseline/submission.csv')

# Save to submission folder
os.makedirs('/home/submission', exist_ok=True)
shutil.copy(best_file, '/home/submission/submission.csv')

print(f"Copied best submission to experiment folder and /home/submission/")
print(f"Best score: {total_score:.6f}")

In [None]:
# Save metrics
import json

metrics = {
    'cv_score': total_score,
    'best_file': best_file,
    'per_n_scores': {str(k): v for k, v in per_n_scores.items()}
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Saved metrics to experiments/001_baseline/metrics.json")
print(f"CV Score: {total_score:.6f}")