# Baseline: Pre-optimized santa-2025.csv

This notebook establishes the baseline using the best pre-optimized solution from snapshots.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
import json

# Set high precision for Decimal calculations
getcontext().prec = 50

# Tree geometry (15 vertices)
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

print(f"Tree has {len(TX)} vertices")
print(f"Tree height: {TY.max() - TY.min()}")
print(f"Tree width: {TX.max() - TX.min()}")

Tree has 15 vertices
Tree height: 1.0
Tree width: 0.7


In [2]:
def get_tree_vertices(x, y, deg):
    """Get the vertices of a tree at position (x, y) with rotation deg."""
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    
    # Rotate and translate
    rotated_x = TX * cos_a - TY * sin_a + x
    rotated_y = TX * sin_a + TY * cos_a + y
    
    return rotated_x, rotated_y

def get_bounding_box(trees_df):
    """Calculate the bounding box side length for a set of trees."""
    all_x = []
    all_y = []
    
    for _, row in trees_df.iterrows():
        # Parse values (remove 's' prefix)
        x = float(str(row['x'])[1:]) if str(row['x']).startswith('s') else float(row['x'])
        y = float(str(row['y'])[1:]) if str(row['y']).startswith('s') else float(row['y'])
        deg = float(str(row['deg'])[1:]) if str(row['deg']).startswith('s') else float(row['deg'])
        
        vx, vy = get_tree_vertices(x, y, deg)
        all_x.extend(vx)
        all_y.extend(vy)
    
    min_x, max_x = min(all_x), max(all_x)
    min_y, max_y = min(all_y), max(all_y)
    
    # Square bounding box - take the max of width and height
    side = max(max_x - min_x, max_y - min_y)
    return side

print("Functions defined.")

Functions defined.


In [3]:
# Load the pre-optimized solution
baseline_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df = pd.read_csv(baseline_path)

print(f"Loaded {len(df)} rows")
print(df.head())

Loaded 20100 rows
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  


In [4]:
# Calculate score for each N
def calculate_score(df):
    """Calculate the total score for a submission."""
    scores = []
    
    for n in range(1, 201):
        # Get trees for this N
        prefix = f"{n:03d}_"
        trees = df[df['id'].str.startswith(prefix)]
        
        if len(trees) != n:
            print(f"Warning: N={n} has {len(trees)} trees instead of {n}")
            continue
        
        # Calculate bounding box
        side = get_bounding_box(trees)
        
        # Score contribution: s^2 / n
        contribution = (side ** 2) / n
        scores.append({
            'n': n,
            'side': side,
            'contribution': contribution
        })
    
    scores_df = pd.DataFrame(scores)
    total_score = scores_df['contribution'].sum()
    
    return total_score, scores_df

total_score, scores_df = calculate_score(df)
print(f"\nTotal Score: {total_score:.6f}")
print(f"\nScore breakdown by N range:")
for start in [1, 11, 51, 101, 151]:
    end = min(start + 49, 200)
    range_score = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]['contribution'].sum()
    print(f"  N={start}-{end}: {range_score:.4f}")


Total Score: 70.676102

Score breakdown by N range:
  N=1-50: 19.0422
  N=11-60: 18.3101
  N=51-100: 17.6411
  N=101-150: 17.1441
  N=151-200: 16.8487


In [5]:
# Show the top 10 N values by contribution (highest impact)
print("\nTop 10 N values by score contribution (highest impact):")
top_n = scores_df.nlargest(10, 'contribution')
print(top_n.to_string(index=False))


Top 10 N values by score contribution (highest impact):
 n     side  contribution
 1 0.813173      0.661250
 2 0.949504      0.450779
 3 1.142031      0.434745
 5 1.443692      0.416850
 4 1.290806      0.416545
 7 1.673104      0.399897
 6 1.548438      0.399610
 9 1.867280      0.387415
 8 1.755921      0.385407
15 2.384962      0.379203


In [6]:
# Show the smallest N values (1-10) which have highest per-tree impact
print("\nSmall N values (1-10):")
small_n = scores_df[scores_df['n'] <= 10]
print(small_n.to_string(index=False))
print(f"\nTotal contribution from N=1-10: {small_n['contribution'].sum():.4f}")


Small N values (1-10):
 n     side  contribution
 1 0.813173      0.661250
 2 0.949504      0.450779
 3 1.142031      0.434745
 4 1.290806      0.416545
 5 1.443692      0.416850
 6 1.548438      0.399610
 7 1.673104      0.399897
 8 1.755921      0.385407
 9 1.867280      0.387415
10 1.940696      0.376630

Total contribution from N=1-10: 4.3291


In [7]:
# Copy the baseline to submission folder
import shutil
import os

os.makedirs('/home/submission', exist_ok=True)
shutil.copy(baseline_path, '/home/submission/submission.csv')
print("Copied baseline to /home/submission/submission.csv")

# Verify the copy
df_verify = pd.read_csv('/home/submission/submission.csv')
print(f"Verified: {len(df_verify)} rows")

Copied baseline to /home/submission/submission.csv
Verified: 20100 rows


In [8]:
# Save metrics
metrics = {
    'cv_score': total_score,
    'n_trees_total': len(df),
    'score_breakdown': {
        'n_1_10': scores_df[scores_df['n'] <= 10]['contribution'].sum(),
        'n_11_50': scores_df[(scores_df['n'] > 10) & (scores_df['n'] <= 50)]['contribution'].sum(),
        'n_51_100': scores_df[(scores_df['n'] > 50) & (scores_df['n'] <= 100)]['contribution'].sum(),
        'n_101_150': scores_df[(scores_df['n'] > 100) & (scores_df['n'] <= 150)]['contribution'].sum(),
        'n_151_200': scores_df[(scores_df['n'] > 150) & (scores_df['n'] <= 200)]['contribution'].sum()
    }
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Saved metrics.json")
print(f"\nFinal CV Score: {total_score:.6f}")
print(f"Target Score: 68.919154")
print(f"Gap: {total_score - 68.919154:.6f} ({(total_score - 68.919154) / 68.919154 * 100:.2f}%)")

Saved metrics.json

Final CV Score: 70.676102
Target Score: 68.919154
Gap: 1.756948 (2.55%)
