# Baseline Experiment - Pre-optimized Submission

Verify the score of the pre-optimized santa-2025.csv submission.

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
import warnings
warnings.filterwarnings('ignore')

# Tree geometry (15 vertices)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_COORDS = list(zip(TX, TY))

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg."""
    poly = Polygon(TREE_COORDS)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

print("Tree geometry loaded. Tree has 15 vertices.")
print(f"Tree height: {max(TY) - min(TY):.2f}")
print(f"Tree width: {max(TX) - min(TX):.2f}")

In [None]:
# Load the pre-optimized submission
submission_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df = pd.read_csv(submission_path)
print(f"Submission shape: {df.shape}")
print(f"Expected rows: {sum(range(1, 201))} (1+2+...+200)")
print(f"\nFirst few rows:")
print(df.head(10))

In [None]:
# Parse the submission - values are prefixed with 's'
def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

# Parse all values
df['x_val'] = df['x'].apply(parse_value)
df['y_val'] = df['y'].apply(parse_value)
df['deg_val'] = df['deg'].apply(parse_value)

# Extract N from id (format: NNN_idx)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"Parsed values:")
print(df[['id', 'x_val', 'y_val', 'deg_val', 'n', 'tree_idx']].head(10))

In [None]:
def calculate_score_for_n(group):
    """Calculate the score contribution for a single N value."""
    n = group['n'].iloc[0]
    
    # Create all tree polygons
    polygons = []
    for _, row in group.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    # Get all vertices
    all_coords = []
    for poly in polygons:
        coords = np.array(poly.exterior.coords)
        all_coords.append(coords)
    
    all_coords = np.vstack(all_coords)
    
    # Calculate bounding box side length
    min_x, min_y = all_coords.min(axis=0)
    max_x, max_y = all_coords.max(axis=0)
    side = max(max_x - min_x, max_y - min_y)
    
    # Score contribution for this N
    score_contribution = (side ** 2) / n
    
    return pd.Series({
        'n': n,
        'side': side,
        'score_contribution': score_contribution,
        'num_trees': len(group)
    })

print("Calculating scores for each N...")
scores_by_n = df.groupby('n').apply(calculate_score_for_n).reset_index(drop=True)
print(f"\nFirst 10 N values:")
print(scores_by_n.head(10))

In [None]:
# Calculate total score
total_score = scores_by_n['score_contribution'].sum()
print(f"\n{'='*50}")
print(f"TOTAL SCORE: {total_score:.6f}")
print(f"{'='*50}")
print(f"\nExpected score: 70.676102")
print(f"Difference: {abs(total_score - 70.676102):.6f}")

# Show worst N values (highest score contribution)
print(f"\nWorst 10 N values (highest score contribution):")
worst_n = scores_by_n.nlargest(10, 'score_contribution')
print(worst_n)

In [None]:
# Validate: check for overlaps in a few configurations
def has_overlap(polygons):
    """Check if any polygons overlap (excluding touching)."""
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                # Check if it's a real overlap (not just numerical precision)
                intersection = poly.intersection(polygons[idx])
                if intersection.area > 1e-10:
                    return True
    return False

# Check a few N values for overlaps
print("Checking for overlaps in sample configurations...")
for n in [1, 10, 50, 100, 200]:
    group = df[df['n'] == n]
    polygons = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in group.iterrows()]
    overlap = has_overlap(polygons)
    print(f"N={n}: {'OVERLAP DETECTED!' if overlap else 'No overlaps'}")

print("\nValidation complete.")

In [None]:
# Save metrics
import json

metrics = {
    'cv_score': total_score,
    'total_n_values': 200,
    'total_rows': len(df),
    'worst_n': int(worst_n.iloc[0]['n']),
    'worst_n_contribution': float(worst_n.iloc[0]['score_contribution'])
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved to experiments/001_baseline/metrics.json")
print(json.dumps(metrics, indent=2))