# Experiment 001: Baseline Verification

Load the best pre-optimized CSV and verify its score.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import shutil
import os

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def parse_value(s):
    """Parse value with 's' prefix"""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg"""
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    
    vertices = []
    for tx, ty in zip(TX, TY):
        # Rotate and translate
        rx = tx * cos_a - ty * sin_a + x
        ry = tx * sin_a + ty * cos_a + y
        vertices.append((rx, ry))
    
    return Polygon(vertices)

def compute_bounding_side(polygons):
    """Compute the side length of the square bounding box"""
    if not polygons:
        return 0
    
    all_points = []
    for poly in polygons:
        all_points.extend(list(poly.exterior.coords))
    
    all_points = np.array(all_points)
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    
    return max(max_x - min_x, max_y - min_y)

def compute_score_for_n(df, n):
    """Compute score contribution for N trees"""
    prefix = f"{n:03d}_"
    trees = df[df['id'].str.startswith(prefix)]
    
    if len(trees) != n:
        print(f"Warning: Expected {n} trees for N={n}, got {len(trees)}")
        return float('inf')
    
    polygons = []
    for _, row in trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        polygons.append(create_tree_polygon(x, y, deg))
    
    side = compute_bounding_side(polygons)
    return side**2 / n

def compute_total_score(df):
    """Compute total score for all N from 1 to 200"""
    total = 0
    for n in range(1, 201):
        score_n = compute_score_for_n(df, n)
        total += score_n
    return total

print("Functions defined successfully")

Functions defined successfully


In [2]:
# Load the best pre-optimized CSV
src_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv/santa-2025.csv'
df = pd.read_csv(src_path)
print(f"Loaded {len(df)} rows")
print(df.head())

Loaded 20100 rows
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  


In [3]:
# Compute total score
print("Computing total score...")
total_score = compute_total_score(df)
print(f"\nTotal Score: {total_score:.6f}")

Computing total score...



Total Score: 70.676102


In [4]:
# Also check other pre-optimized sources
other_sources = [
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv'
]

scores = {}
for src in other_sources:
    if os.path.exists(src):
        try:
            df_other = pd.read_csv(src)
            score = compute_total_score(df_other)
            scores[src.split('/')[-1]] = score
            print(f"{src.split('/')[-1]}: {score:.6f}")
        except Exception as e:
            print(f"{src.split('/')[-1]}: Error - {e}")

best_ensemble.csv: 70.676102


submission.csv: 70.676501


ensemble.csv: 70.676102


submission.csv: 70.676501


In [5]:
# Find the best source and copy to submission
best_source = src_path
best_score = total_score

for src in other_sources:
    if os.path.exists(src):
        try:
            df_other = pd.read_csv(src)
            score = compute_total_score(df_other)
            if score < best_score:
                best_score = score
                best_source = src
        except:
            pass

print(f"\nBest source: {best_source}")
print(f"Best score: {best_score:.6f}")

# Copy to submission
shutil.copy(best_source, '/home/submission/submission.csv')
print(f"\nCopied to /home/submission/submission.csv")


Best source: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv/santa-2025.csv
Best score: 70.676102

Copied to /home/submission/submission.csv


In [6]:
# Verify the copied submission
df_sub = pd.read_csv('/home/submission/submission.csv')
final_score = compute_total_score(df_sub)
print(f"Final submission score: {final_score:.6f}")

Final submission score: 70.676102


In [8]:
# Check all available pre-optimized CSVs and find the best one
import glob

all_csvs = glob.glob('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/**/*.csv', recursive=True)
print(f"Found {len(all_csvs)} CSV files")

best_score = float('inf')
best_file = None

for csv_path in all_csvs:
    try:
        df_test = pd.read_csv(csv_path)
        if 'id' in df_test.columns and 'x' in df_test.columns:
            score = compute_total_score(df_test)
            if score < best_score:
                best_score = score
                best_file = csv_path
            print(f"{csv_path.split('preoptimized/')[-1]}: {score:.6f}")
    except Exception as e:
        print(f"{csv_path.split('preoptimized/')[-1]}: Error - {str(e)[:50]}")

print(f"\nBest file: {best_file}")
print(f"Best score: {best_score:.6f}")

Found 30 CSV files


ensemble.csv: 70.676102


submission.csv: 70.676501


santa-2025.csv: 70.676102


best_ensemble.csv: 70.676102


telegram/72.49.csv: 72.495739


telegram/71.97.csv: 71.972027


telegram/telegram_extracted/72.49.csv: 72.495739


telegram/telegram_extracted/71.97.csv: 71.972027


santa25-public/submission_JKoT4.csv: 72.489504


santa25-public/New_Tree_144_196.csv: 72.927920


santa25-public/submission_JKoT3.csv: 72.489488


santa25-public/santa2025_ver2_v61.csv: 72.951925


santa25-public/submission_JKoT2.csv: 72.489348


santa25-public/santa2025_ver2_v67.csv: 72.938567


santa25-public/santa2025_ver2_v76.csv: 72.826444


santa25-public/submission_70_936673758122.csv: 70.936674


santa25-public/santa2025_ver2_v65.csv: 72.935294


santa25-public/submission_70_926149550346.csv: 70.926150


santa25-public/santa2025_ver2_v66.csv: 72.938599


santa25-public/santa2025_ver2_v63.csv: 72.947427


santa25-public/santa2025_ver2_v69.csv: 72.850110


santa25-public/submission_JKoT1.csv: 72.489483


santa25-public/submission_opt1.csv: 70.990692


santa25-public/santa2025_ver2_v68.csv: 72.939233


santa-2025-csv/santa-2025.csv: 70.676102


bucket-of-chump/submission.csv: 70.676501


blended/submission (77).csv: 72.135010


santa-2025-try3/submission.csv: 72.935294


santa-2025-try3/submission_sa.csv: 72.935294


chistyakov/submission_best.csv: 70.926150

Best file: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv
Best score: 70.676102


In [9]:
# Check the chistyakov external dataset
df_chistyakov = pd.read_csv('/home/code/external_data/submission_best.csv')
score_chistyakov = compute_total_score(df_chistyakov)
print(f"Chistyakov external dataset score: {score_chistyakov:.6f}")

Chistyakov external dataset score: 70.926150


In [11]:
# Check the saspav external dataset (latest version)
df_saspav = pd.read_csv('/home/code/external_data/saspav/santa-2025.csv')
score_saspav = compute_total_score(df_saspav)
print(f"Saspav external dataset score: {score_saspav:.6f}")

# Compare with our current best
print(f"\nCurrent best: 70.676102")
print(f"Saspav: {score_saspav:.6f}")
print(f"Difference: {score_saspav - 70.676102:.6f}")

Saspav external dataset score: 70.659959

Current best: 70.676102
Saspav: 70.659959
Difference: -0.016143


In [12]:
# Update submission with the better saspav solution
shutil.copy('/home/code/external_data/saspav/santa-2025.csv', '/home/submission/submission.csv')
print("Updated submission with saspav solution")

# Verify
df_final = pd.read_csv('/home/submission/submission.csv')
final_score = compute_total_score(df_final)
print(f"Final submission score: {final_score:.6f}")

Updated submission with saspav solution


Final submission score: 70.659959


In [None]:
# Check the bucket-of-chump external dataset (latest version)
df_bucket = pd.read_csv('/home/code/external_data/bucket/submission.csv')
score_bucket = compute_total_score(df_bucket)
print(f"Bucket-of-chump external dataset score: {score_bucket:.6f}")

# Compare with our current best
print(f"\nCurrent best: 70.659959")
print(f"Bucket: {score_bucket:.6f}")
print(f"Difference: {score_bucket - 70.659959:.6f}")