# Baseline Experiment - Santa 2025

Goal: Establish baseline score using the best snapshot submission.

**Scoring Formula:** score = Σ(side_n² / n) for n=1 to 200 (lower is better)

**Target Score:** Beat 68.894234

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely import STRtree
import math
from decimal import Decimal, getcontext
getcontext().prec = 50

# Tree shape vertices (15 points)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print(f"Tree has {len(TX)} vertices")

Tree has 15 vertices


In [2]:
def get_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg degrees."""
    angle_rad = math.radians(deg)
    cos_a = math.cos(angle_rad)
    sin_a = math.sin(angle_rad)
    
    vertices = []
    for tx, ty in zip(TX, TY):
        # Rotate around origin, then translate
        rx = tx * cos_a - ty * sin_a + x
        ry = tx * sin_a + ty * cos_a + y
        vertices.append((rx, ry))
    
    return Polygon(vertices)

def get_bounding_box_side(polygons):
    """Get the side length of the square bounding box for a list of polygons."""
    if not polygons:
        return 0
    
    min_x = min_y = float('inf')
    max_x = max_y = float('-inf')
    
    for poly in polygons:
        bounds = poly.bounds  # (minx, miny, maxx, maxy)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])
    
    width = max_x - min_x
    height = max_y - min_y
    return max(width, height)

def check_overlaps(polygons):
    """Check if any polygons overlap (not just touch)."""
    if len(polygons) < 2:
        return False
    
    tree = STRtree(polygons)
    for i, poly in enumerate(polygons):
        # Get potential overlapping polygons
        candidates = tree.query(poly)
        for j in candidates:
            if j <= i:
                continue
            if poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                return True
    return False

print("Functions defined")

Functions defined


In [3]:
# Load the snapshot submission
df = pd.read_csv('/home/code/experiments/001_baseline/snapshot_submission.csv')
print(f"Submission shape: {df.shape}")
print(df.head(10))

Submission shape: (20100, 4)
      id                          x                         y  \
0  001_0  s-48.19608619421424577922  s58.77098461521422478882   
1  002_0      s0.202513410337269301    s-0.028957664041420434   
2  002_1     s-0.105680728905459279    s-0.551876178651849569   
3  003_0       s1.12365581614030097      s0.78110181599256301   
4  003_1       s1.23405569584216002      s1.27599950066375900   
5  003_2       s0.64171464022907498      s1.18045856661338111   
6  004_0        s-0.324747789589372        s0.132109978088185   
7  004_1         s0.315354346242638        s0.132109978063475   
8  004_2         s0.324747789592379       s-0.732109978069476   
9  004_3        s-0.315354348134818       s-0.732109978094186   

                        deg  
0  s45.00000000000000000000  
1   s203.629377730656727863  
2    s23.629377730656813128  
3    s111.12513229289299943  
4     s66.37062226934300213  
5    s155.13405193710082131  
6      s156.370622145636389  
7      s156.370

In [4]:
# Parse the submission - values are prefixed with 's'
def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

df['x_val'] = df['x'].apply(parse_value)
df['y_val'] = df['y'].apply(parse_value)
df['deg_val'] = df['deg'].apply(parse_value)

# Extract n from id (format: NNN_idx)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"N values range: {df['n'].min()} to {df['n'].max()}")
print(f"Total trees: {len(df)}")
print(f"Expected total: {sum(range(1, 201))} = {200*201//2}")

N values range: 1 to 200
Total trees: 20100
Expected total: 20100 = 20100


In [5]:
# Calculate score for each n-configuration
scores = []
all_valid = True

for n in range(1, 201):
    config = df[df['n'] == n]
    if len(config) != n:
        print(f"ERROR: n={n} has {len(config)} trees, expected {n}")
        all_valid = False
        continue
    
    # Create polygons
    polygons = []
    for _, row in config.iterrows():
        poly = get_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    # Get bounding box side
    side = get_bounding_box_side(polygons)
    
    # Calculate score contribution
    score_contrib = (side ** 2) / n
    scores.append({'n': n, 'side': side, 'score_contrib': score_contrib})
    
    if n <= 5 or n % 50 == 0:
        print(f"n={n}: side={side:.6f}, score_contrib={score_contrib:.6f}")

print(f"\nAll configurations valid: {all_valid}")

n=1: side=0.813173, score_contrib=0.661250
n=2: side=0.949504, score_contrib=0.450779
n=3: side=1.142031, score_contrib=0.434745
n=4: side=1.290806, score_contrib=0.416545
n=5: side=1.443692, score_contrib=0.416850
n=50: side=4.247076, score_contrib=0.360753


n=100: side=5.859990, score_contrib=0.343395


n=150: side=7.110523, score_contrib=0.337064


n=200: side=8.216433, score_contrib=0.337549

All configurations valid: True


In [6]:
# Calculate total score
scores_df = pd.DataFrame(scores)
total_score = scores_df['score_contrib'].sum()

print(f"\n=== BASELINE SCORE ===")
print(f"Total Score: {total_score:.6f}")
print(f"Target Score: 68.894234")
print(f"Gap to target: {total_score - 68.894234:.6f}")

# Show top contributors to score
print(f"\n=== Top 10 Score Contributors ===")
top_contrib = scores_df.nlargest(10, 'score_contrib')
print(top_contrib.to_string(index=False))


=== BASELINE SCORE ===
Total Score: 70.615745
Target Score: 68.894234
Gap to target: 1.721511

=== Top 10 Score Contributors ===
 n     side  score_contrib
 1 0.813173       0.661250
 2 0.949504       0.450779
 3 1.142031       0.434745
 5 1.443692       0.416850
 4 1.290806       0.416545
 7 1.673104       0.399897
 6 1.548438       0.399610
 9 1.867280       0.387415
 8 1.755921       0.385407
15 2.377862       0.376949


In [8]:
# Validate no overlaps for a sample of configurations
print("Validating overlaps for sample configurations...")

for n in [1, 5, 10, 20, 50, 100, 150, 200]:
    config = df[df['n'] == n]
    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in config.iterrows()]
    has_overlap = check_overlaps(polygons)
    print(f"n={n}: overlaps={has_overlap}")

print("\nValidation complete")

Validating overlaps for sample configurations...
n=1: overlaps=False
n=5: overlaps=False
n=10: overlaps=False
n=20: overlaps=False
n=50: overlaps=False
n=100: overlaps=False
n=150: overlaps=False
n=200: overlaps=False

Validation complete


In [None]:
# Save the baseline submission to the submission folder\nimport shutil\nimport os\n\nos.makedirs('/home/submission', exist_ok=True)\nshutil.copy('/home/code/experiments/001_baseline/snapshot_submission.csv', '/home/submission/submission.csv')\n\nprint(\"Submission saved to /home/submission/submission.csv\")\n\n# Also save metrics\nimport json\nmetrics = {'cv_score': total_score}\nwith open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:\n    json.dump(metrics, f)\n\nprint(f\"Metrics saved: {metrics}\")

In [None]:
# Save the baseline submission to the submission folder
import shutil
import os

os.makedirs('/home/submission', exist_ok=True)
shutil.copy('/home/code/experiments/001_baseline/snapshot_submission.csv', '/home/submission/submission.csv')

print("Submission saved to /home/submission/submission.csv")

# Also save metrics
import json
metrics = {'cv_score': total_score}
with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)

print(f"Metrics saved: {metrics}")