# Baseline Experiment - Santa 2025

Validate and score the best pre-optimized submission from snapshots.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json

getcontext().prec = 30

# Tree shape definition
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, angle_deg):
    """Create a tree polygon at position (x,y) with given rotation angle."""
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle_deg, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x, yoff=y)
    return poly

def has_overlap(poly1, poly2, tolerance=1e-9):
    """Check if two polygons overlap (not just touch)."""
    if not poly1.intersects(poly2):
        return False
    intersection = poly1.intersection(poly2)
    return intersection.area > tolerance

print("Functions defined successfully")

Functions defined successfully


In [2]:
# Load the best baseline submission
baseline_path = '/home/code/best_baseline.csv'
df = pd.read_csv(baseline_path)

# Parse the submission
df['x_val'] = df['x'].astype(str).str.replace('s', '').astype(float)
df['y_val'] = df['y'].astype(str).str.replace('s', '').astype(float)
df['deg_val'] = df['deg'].astype(str).str.replace('s', '').astype(float)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

print(f"Loaded {len(df)} rows")
print(f"N values: {df['n'].min()} to {df['n'].max()}")
print(df.head())

Loaded 20100 rows
N values: 1 to 200
      id                      x                      y                  deg  \
0  001_0     s43.59119209210215    s-31.78326706874178   s44.99999999999998   
1  002_0    s0.1540970696213559  s-0.03854074269479465  s144.27276086312358   
2  002_1  s-0.15409706962137285   s-0.5614592573052241  s324.27276086312355   
3  003_0     s0.254937643697833    s-0.233436061549416  s113.56326044172948   
4  003_1     s0.357722754471247     s0.250360566787394     s66.370622269343   

       x_val      y_val     deg_val  n  
0  43.591192 -31.783267   45.000000  1  
1   0.154097  -0.038541  144.272761  2  
2  -0.154097  -0.561459  324.272761  2  
3   0.254938  -0.233436  113.563260  3  
4   0.357723   0.250361   66.370622  3  


In [3]:
# Validate and score the submission
def validate_and_score(df, check_overlaps=True):
    """Validate submission has no overlaps and calculate score."""
    total_score = 0
    scores_by_n = {}
    overlaps_found = []
    
    for n in range(1, 201):
        group = df[df['n'] == n]
        if len(group) != n:
            print(f"WARNING: N={n} has {len(group)} trees instead of {n}")
            continue
            
        polys = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                 for _, row in group.iterrows()]
        
        # Check overlaps (expensive, can skip for speed)
        if check_overlaps:
            for i in range(len(polys)):
                for j in range(i+1, len(polys)):
                    if has_overlap(polys[i], polys[j]):
                        overlaps_found.append((n, i, j))
        
        # Calculate bounding box side length
        union = unary_union(polys)
        bounds = union.bounds
        side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
        score_n = side**2 / n
        scores_by_n[n] = score_n
        total_score += score_n
        
        if n <= 10 or n % 50 == 0:
            print(f"N={n}: side={side:.6f}, score_contribution={score_n:.6f}")
    
    return total_score, scores_by_n, overlaps_found

print("Starting validation (this may take a few minutes)...")
total_score, scores_by_n, overlaps = validate_and_score(df, check_overlaps=True)
print(f"\nTotal Score: {total_score:.6f}")
print(f"Overlaps found: {len(overlaps)}")
if overlaps:
    print(f"First 10 overlaps: {overlaps[:10]}")

Starting validation (this may take a few minutes)...
N=1: side=0.813173, score_contribution=0.661250
N=2: side=0.935230, score_contribution=0.437328
N=3: side=1.142031, score_contribution=0.434745
N=4: side=1.282273, score_contribution=0.411056
N=5: side=1.403761, score_contribution=0.394109
N=6: side=1.548438, score_contribution=0.399610
N=7: side=1.673104, score_contribution=0.399897
N=8: side=1.755921, score_contribution=0.385407
N=9: side=1.867280, score_contribution=0.387415
N=10: side=1.940696, score_contribution=0.376630


N=50: side=4.247076, score_contribution=0.360753


N=100: side=5.859990, score_contribution=0.343395


N=150: side=7.110487, score_contribution=0.337060


N=200: side=8.216433, score_contribution=0.337549

Total Score: 70.523320
Overlaps found: 9388
First 10 overlaps: [(2, 0, 1), (4, 0, 1), (4, 0, 3), (4, 1, 2), (4, 1, 3), (5, 0, 1), (5, 0, 2), (5, 0, 3), (5, 0, 4), (5, 1, 2)]


In [4]:
# Analyze score distribution
print("\nTop 10 worst N values (highest score contribution):")
sorted_scores = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    print(f"  N={n}: {score:.6f}")

print("\nTop 10 best N values (lowest score contribution):")
for n, score in sorted_scores[-10:]:
    print(f"  N={n}: {score:.6f}")

# Target comparison
target = 68.894234
print(f"\nTarget: {target}")
print(f"Current: {total_score:.6f}")
print(f"Gap: {total_score - target:.6f}")


Top 10 worst N values (highest score contribution):
  N=1: 0.661250
  N=2: 0.437328
  N=3: 0.434745
  N=4: 0.411056
  N=7: 0.399897
  N=6: 0.399610
  N=5: 0.394109
  N=9: 0.387415
  N=8: 0.385407
  N=15: 0.376949

Top 10 best N values (lowest score contribution):
  N=194: 0.332999
  N=195: 0.332576
  N=179: 0.332571
  N=167: 0.332129
  N=155: 0.331928
  N=168: 0.331548
  N=180: 0.331000
  N=182: 0.329988
  N=181: 0.329945
  N=156: 0.329912

Target: 68.894234
Current: 70.523320
Gap: 1.629086


In [5]:
# Save submission and metrics
import shutil

# Copy to submission folder
shutil.copy(baseline_path, '/home/submission/submission.csv')
shutil.copy(baseline_path, '/home/code/experiments/001_baseline/submission.csv')

# Save metrics
metrics = {
    'cv_score': total_score,
    'overlaps': len(overlaps),
    'target': target,
    'gap_to_target': total_score - target,
    'scores_by_n': {str(k): v for k, v in scores_by_n.items()}
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Saved submission to /home/submission/submission.csv")
print(f"Saved metrics to /home/code/experiments/001_baseline/metrics.json")
print(f"\nFinal CV Score: {total_score:.6f}")

Saved submission to /home/submission/submission.csv
Saved metrics to /home/code/experiments/001_baseline/metrics.json

Final CV Score: 70.523320


In [None]:
# The ensemble submission has overlaps! Let's use the known valid baseline instead
valid_baseline_path = '/home/code/valid_baseline.csv'
df_valid = pd.read_csv(valid_baseline_path)

# Parse the submission
df_valid['x_val'] = df_valid['x'].astype(str).str.replace('s', '').astype(float)
df_valid['y_val'] = df_valid['y'].astype(str).str.replace('s', '').astype(float)
df_valid['deg_val'] = df_valid['deg'].astype(str).str.replace('s', '').astype(float)
df_valid['n'] = df_valid['id'].apply(lambda x: int(x.split('_')[0]))

print(f"Loaded {len(df_valid)} rows")
print(df_valid.head())

In [None]:
# Validate the known valid baseline (should have 0 overlaps)
print("Validating known valid baseline...")
total_score_valid, scores_by_n_valid, overlaps_valid = validate_and_score(df_valid, check_overlaps=True)
print(f"\nTotal Score: {total_score_valid:.6f}")
print(f"Overlaps found: {len(overlaps_valid)}")