# Experiment 001: Establish Baseline

Copy the best pre-optimized submission and verify its score.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os
import json

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
BASE_TREE = Polygon(zip(TX, TY))

print(f"Tree area: {BASE_TREE.area:.4f}")
print(f"Tree bounds: {BASE_TREE.bounds}")
print(f"Tree width: {BASE_TREE.bounds[2] - BASE_TREE.bounds[0]:.4f}")
print(f"Tree height: {BASE_TREE.bounds[3] - BASE_TREE.bounds[1]:.4f}")

Tree area: 0.2456
Tree bounds: (-0.35, -0.2, 0.35, 0.8)
Tree width: 0.7000
Tree height: 1.0000


In [2]:
def parse_value(s):
    """Parse value with 's' prefix for precision"""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg"""
    tree = affinity.rotate(BASE_TREE, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

def calculate_score(df):
    """Calculate total score for a submission"""
    total_score = 0
    scores_by_n = {}
    
    for n in range(1, 201):
        # Get trees for this N
        prefix = f"{n:03d}_"
        n_trees = df[df['id'].str.startswith(prefix)]
        
        if len(n_trees) != n:
            print(f"Warning: N={n} has {len(n_trees)} trees, expected {n}")
            continue
        
        # Create polygons and find bounding box
        all_coords = []
        for _, row in n_trees.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            poly = create_tree_polygon(x, y, deg)
            coords = np.array(poly.exterior.coords)
            all_coords.append(coords)
        
        all_coords = np.vstack(all_coords)
        x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
        y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
        side = max(x_range, y_range)
        
        score_n = side**2 / n
        total_score += score_n
        scores_by_n[n] = {'side': side, 'score': score_n}
    
    return total_score, scores_by_n

print("Score calculation functions defined")

Score calculation functions defined


In [3]:
# Load the pre-optimized submission
preopt_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df = pd.read_csv(preopt_path)
print(f"Loaded {len(df)} rows")
print(df.head(10))

Loaded 20100 rows
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   
5  003_2      s0.641714640229075      s1.180458566613381   
6  004_0  s-0.324747789589372171   s0.132109978088185392   
7  004_1   s0.315354346242637695   s0.132109978063475492   
8  004_2   s0.324747789592379210  s-0.732109978069475531   
9  004_3  s-0.315354348134818330  s-0.732109978094185987   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  
5      s155.13405193710082  
6  s156.370622145636389178  
7  s156.370622269264089255  
8  s336.370622269264003990  
9  s336.370622145636446021  


In [4]:
# Calculate score for pre-optimized submission
print("Calculating score for pre-optimized submission...")
total_score, scores_by_n = calculate_score(df)
print(f"\nTotal Score: {total_score:.6f}")

# Show top 15 contributors
print("\nTop 15 score contributors (by score/n):")
sorted_scores = sorted(scores_by_n.items(), key=lambda x: x[1]['score'], reverse=True)[:15]
for n, data in sorted_scores:
    print(f"N={n:3d}: side={data['side']:.4f}, score={data['score']:.4f}")

Calculating score for pre-optimized submission...



Total Score: 70.676102

Top 15 score contributors (by score/n):
N=  1: side=0.8132, score=0.6612
N=  2: side=0.9495, score=0.4508
N=  3: side=1.1420, score=0.4347
N=  5: side=1.4437, score=0.4168
N=  4: side=1.2908, score=0.4165
N=  7: side=1.6731, score=0.3999
N=  6: side=1.5484, score=0.3996
N=  9: side=1.8673, score=0.3874
N=  8: side=1.7559, score=0.3854
N= 15: side=2.3850, score=0.3792
N= 10: side=1.9407, score=0.3766
N= 21: side=2.8117, score=0.3765
N= 20: side=2.7425, score=0.3761
N= 11: side=2.0330, score=0.3757
N= 22: side=2.8733, score=0.3753


In [5]:
# Copy to submission folder
import shutil
os.makedirs('/home/submission', exist_ok=True)
shutil.copy(preopt_path, '/home/submission/submission.csv')
print("Copied pre-optimized submission to /home/submission/submission.csv")

# Verify copy
df_verify = pd.read_csv('/home/submission/submission.csv')
print(f"Verified: {len(df_verify)} rows")
print(df_verify.head())

Copied pre-optimized submission to /home/submission/submission.csv
Verified: 20100 rows
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  


In [6]:
# Save metrics
metrics = {'cv_score': total_score}
os.makedirs('/home/code/experiments/001_baseline', exist_ok=True)
with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Saved metrics: {metrics}")

Saved metrics: {'cv_score': 70.67610239809186}
