# Experiment 001: Baseline Verification

Load the best pre-optimized submission and verify its score.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import shutil
import os

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

print("Tree geometry loaded")
print(f"Number of vertices: {len(TREE_VERTICES)}")

Tree geometry loaded
Number of vertices: 15


In [2]:
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        
        initial_polygon = Polygon(TREE_VERTICES)
        rotated = affinity.rotate(initial_polygon, self.angle, origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=self.center_x, yoff=self.center_y)

def parse_value(val):
    """Parse value with 's' prefix"""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    """Load trees for configuration N"""
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def compute_bounding_side(trees):
    """Compute bounding box side length"""
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    side = max(all_points.max(axis=0) - all_points.min(axis=0))
    return side

def compute_total_score(df):
    """Compute total score for submission"""
    total = 0
    for n in range(1, 201):
        trees = load_trees_for_n(df, n)
        side = compute_bounding_side(trees)
        total += side**2 / n
    return total

print("Functions defined")

Functions defined


In [3]:
# Load the best pre-optimized submission
preopt_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv/santa-2025.csv'

# Check if file exists
if os.path.exists(preopt_path):
    df = pd.read_csv(preopt_path)
    print(f"Loaded {len(df)} rows from santa-2025.csv")
    print(f"Columns: {df.columns.tolist()}")
    print(f"\nFirst 10 rows:")
    print(df.head(10))
else:
    print(f"File not found: {preopt_path}")
    # Try alternative path
    alt_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
    if os.path.exists(alt_path):
        df = pd.read_csv(alt_path)
        print(f"Loaded from alternative path: {len(df)} rows")

Loaded 20100 rows from santa-2025.csv
Columns: ['id', 'x', 'y', 'deg']

First 10 rows:
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   
5  003_2      s0.641714640229075      s1.180458566613381   
6  004_0  s-0.324747789589372171   s0.132109978088185392   
7  004_1   s0.315354346242637695   s0.132109978063475492   
8  004_2   s0.324747789592379210  s-0.732109978069475531   
9  004_3  s-0.315354348134818330  s-0.732109978094185987   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  
5      s155.13405193710082  
6  s156.370622145636389178  
7  s156.370622269264

In [4]:
# Compute total score
print("Computing total score...")
score = compute_total_score(df)
print(f"\nTotal Score: {score:.6f}")
print(f"Target Score: 68.919154")
print(f"Gap: {score - 68.919154:.6f} ({(score - 68.919154) / 68.919154 * 100:.2f}%)")

Computing total score...



Total Score: 70.676102
Target Score: 68.919154
Gap: 1.756948 (2.55%)


In [5]:
# Analyze per-N scores
print("\nPer-N Analysis (top 20 contributors to score):")
per_n_scores = []
for n in range(1, 201):
    trees = load_trees_for_n(df, n)
    side = compute_bounding_side(trees)
    contribution = side**2 / n
    per_n_scores.append({'n': n, 'side': side, 'contribution': contribution, 'efficiency': n / (side**2)})

per_n_df = pd.DataFrame(per_n_scores)
per_n_df = per_n_df.sort_values('contribution', ascending=False)
print(per_n_df.head(20).to_string())


Per-N Analysis (top 20 contributors to score):


     n      side  contribution  efficiency
0    1  0.813173      0.661250    1.512287
1    2  0.949504      0.450779    2.218381
2    3  1.142031      0.434745    2.300198
4    5  1.443692      0.416850    2.398947
3    4  1.290806      0.416545    2.400702
6    7  1.673104      0.399897    2.500647
5    6  1.548438      0.399610    2.502438
8    9  1.867280      0.387415    2.581211
7    8  1.755921      0.385407    2.594658
14  15  2.384962      0.379203    2.637111
9   10  1.940696      0.376630    2.655125
20  21  2.811667      0.376451    2.656389
19  20  2.742469      0.376057    2.659172
10  11  2.033002      0.375736    2.661443
21  22  2.873270      0.375258    2.664832
15  16  2.446640      0.374128    2.672882
25  26  3.118320      0.373997    2.673819
11  12  2.114873      0.372724    2.682950
12  13  2.200046      0.372323    2.685839
24  25  3.050182      0.372144    2.687129


In [6]:
# Check for overlaps in a few configurations
def has_overlap(trees):
    """Check if any trees overlap"""
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print("Checking for overlaps in sample configurations...")
for n in [1, 10, 50, 100, 200]:
    trees = load_trees_for_n(df, n)
    overlap = has_overlap(trees)
    print(f"N={n}: {'OVERLAP DETECTED!' if overlap else 'No overlaps'} (side={compute_bounding_side(trees):.4f})")

Checking for overlaps in sample configurations...
N=1: No overlaps (side=0.8132)
N=10: No overlaps (side=1.9407)
N=50: No overlaps (side=4.2471)
N=100: No overlaps (side=5.8782)
N=200: No overlaps (side=8.2187)


In [7]:
# Copy to submission directory
shutil.copy(preopt_path, '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

# Verify the copy
df_verify = pd.read_csv('/home/submission/submission.csv')
print(f"Verified: {len(df_verify)} rows")

Copied to /home/submission/submission.csv
Verified: 20100 rows


In [8]:
# Also save to experiment folder
shutil.copy(preopt_path, '/home/code/experiments/001_baseline/submission.csv')
print("Saved to experiment folder")

# Summary
print(f"\n" + "="*50)
print(f"BASELINE SUMMARY")
print(f"="*50)
print(f"Score: {score:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {score - 68.919154:.6f}")
print(f"Rows: {len(df)}")
print(f"Submission saved to: /home/submission/submission.csv")

Saved to experiment folder

BASELINE SUMMARY
Score: 70.676102
Target: 68.919154
Gap: 1.756948
Rows: 20100
Submission saved to: /home/submission/submission.csv
