# Snapshot Ensemble Experiment

Verify scores of best_snapshot.csv and saspav_best.csv, then create an ensemble to achieve 70.626088.

In [1]:
import pandas as pd
import numpy as np
import math
from numba import njit
import json
import os

# Tree vertices
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

In [2]:
@njit
def score_group(xs, ys, degs, tx, ty):
    """Calculate score for a single N configuration"""
    n = xs.size
    V = tx.size
    mnx = mny = 1e300
    mxx = mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r)
        s = math.sin(r)
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xs[i]
            Y = s * tx[j] + c * ty[j] + ys[i]
            mnx = min(mnx, X)
            mxx = max(mxx, X)
            mny = min(mny, Y)
            mxy = max(mxy, Y)
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

In [3]:
def parse_submission(filepath):
    """Parse submission CSV and return dict of N -> (xs, ys, degs)"""
    df = pd.read_csv(filepath)
    
    # Parse values (remove 's' prefix)
    df['x_val'] = df['x'].str.replace('s', '').astype(float)
    df['y_val'] = df['y'].str.replace('s', '').astype(float)
    df['deg_val'] = df['deg'].str.replace('s', '').astype(float)
    
    # Extract N from id (format: NNN_idx)
    df['N'] = df['id'].str.split('_').str[0].astype(int)
    
    configs = {}
    for n, group in df.groupby('N'):
        xs = group['x_val'].values
        ys = group['y_val'].values
        degs = group['deg_val'].values
        configs[n] = (xs, ys, degs)
    
    return configs, df

In [4]:
def calculate_total_score(configs, tx, ty):
    """Calculate total score across all N values"""
    total = 0.0
    scores_by_n = {}
    for n in range(1, 201):
        if n in configs:
            xs, ys, degs = configs[n]
            score = score_group(xs, ys, degs, tx, ty)
            scores_by_n[n] = score
            total += score
    return total, scores_by_n

In [5]:
# Load all available submissions
files = {
    'baseline': '/home/code/preoptimized/submission.csv',
    'best_snapshot': '/home/code/preoptimized/best_snapshot.csv',
    'saspav_best': '/home/code/preoptimized/saspav_best.csv',
    'smartmanoj': '/home/code/preoptimized/smartmanoj_submission.csv',
    'ensemble': '/home/code/preoptimized/ensemble.csv',
}

all_configs = {}
all_scores = {}
all_scores_by_n = {}

for name, path in files.items():
    if os.path.exists(path):
        configs, df = parse_submission(path)
        total, scores_by_n = calculate_total_score(configs, TX, TY)
        all_configs[name] = configs
        all_scores[name] = total
        all_scores_by_n[name] = scores_by_n
        print(f"{name}: {total:.6f}")
    else:
        print(f"{name}: FILE NOT FOUND")

baseline: 70.647327
best_snapshot: 70.624381
saspav_best: 70.630478
smartmanoj: 70.743774


ensemble: 70.647327


In [6]:
# Also check the snapshot submission
snapshot_path = '/home/nonroot/snapshots/santa-2025/21198927060/submission/submission.csv'
if os.path.exists(snapshot_path):
    configs, df = parse_submission(snapshot_path)
    total, scores_by_n = calculate_total_score(configs, TX, TY)
    all_configs['snapshot_submission'] = configs
    all_scores['snapshot_submission'] = total
    all_scores_by_n['snapshot_submission'] = scores_by_n
    print(f"snapshot_submission: {total:.6f}")

snapshot_submission: 70.624381


In [7]:
# Find which N values are better in each submission compared to baseline
baseline_scores = all_scores_by_n['baseline']

print("\nN values where other submissions beat baseline:")
for name in all_scores_by_n:
    if name == 'baseline':
        continue
    better_n = []
    for n in range(1, 201):
        if n in all_scores_by_n[name] and n in baseline_scores:
            if all_scores_by_n[name][n] < baseline_scores[n] - 1e-9:
                improvement = baseline_scores[n] - all_scores_by_n[name][n]
                better_n.append((n, improvement))
    if better_n:
        print(f"\n{name}: {len(better_n)} N values better than baseline")
        for n, imp in sorted(better_n, key=lambda x: -x[1])[:10]:
            print(f"  N={n}: improvement {imp:.6f}")


N values where other submissions beat baseline:

best_snapshot: 109 N values better than baseline
  N=43: improvement 0.002975
  N=54: improvement 0.002705
  N=88: improvement 0.002471
  N=15: improvement 0.002253
  N=100: improvement 0.002134
  N=64: improvement 0.001728
  N=87: improvement 0.001537
  N=76: improvement 0.001494
  N=95: improvement 0.001338
  N=91: improvement 0.000935

saspav_best: 104 N values better than baseline
  N=43: improvement 0.002843
  N=54: improvement 0.002696
  N=15: improvement 0.002225
  N=100: improvement 0.002103
  N=64: improvement 0.001684
  N=76: improvement 0.001415
  N=95: improvement 0.001293
  N=91: improvement 0.000882
  N=52: improvement 0.000537
  N=36: improvement 0.000429

snapshot_submission: 109 N values better than baseline
  N=43: improvement 0.002975
  N=54: improvement 0.002705
  N=88: improvement 0.002471
  N=15: improvement 0.002253
  N=100: improvement 0.002134
  N=64: improvement 0.001728
  N=87: improvement 0.001537
  N=76: imp

In [8]:
# Create optimal ensemble - for each N, pick the best configuration
print("\nCreating optimal ensemble...")
best_configs = {}
best_scores_by_n = {}

for n in range(1, 201):
    best_score = float('inf')
    best_source = None
    best_config = None
    
    for name, scores_by_n in all_scores_by_n.items():
        if n in scores_by_n and scores_by_n[n] < best_score:
            best_score = scores_by_n[n]
            best_source = name
            best_config = all_configs[name][n]
    
    best_configs[n] = best_config
    best_scores_by_n[n] = best_score

ensemble_total = sum(best_scores_by_n.values())
print(f"\nOptimal ensemble score: {ensemble_total:.6f}")
print(f"Baseline score: {all_scores['baseline']:.6f}")
print(f"Improvement: {all_scores['baseline'] - ensemble_total:.6f}")


Creating optimal ensemble...

Optimal ensemble score: 70.624381
Baseline score: 70.647327
Improvement: 0.022946


In [9]:
# Show which N values come from which source
print("\nSource breakdown for optimal ensemble:")
source_counts = {}
for n in range(1, 201):
    best_source = None
    best_score = float('inf')
    for name, scores_by_n in all_scores_by_n.items():
        if n in scores_by_n and scores_by_n[n] < best_score:
            best_score = scores_by_n[n]
            best_source = name
    source_counts[best_source] = source_counts.get(best_source, 0) + 1

for source, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f"  {source}: {count} N values")


Source breakdown for optimal ensemble:
  best_snapshot: 197 N values
  baseline: 3 N values


In [10]:
# Generate submission CSV from optimal ensemble
def generate_submission(configs, output_path):
    rows = []
    for n in range(1, 201):
        xs, ys, degs = configs[n]
        for i in range(len(xs)):
            row = {
                'id': f'{n:03d}_{i}',
                'x': f's{xs[i]}',
                'y': f's{ys[i]}',
                'deg': f's{degs[i]}'
            }
            rows.append(row)
    df = pd.DataFrame(rows)
    df.to_csv(output_path, index=False)
    return df

# Save ensemble submission
os.makedirs('/home/submission', exist_ok=True)
submission_df = generate_submission(best_configs, '/home/submission/submission.csv')
print(f"Saved ensemble submission to /home/submission/submission.csv")
print(f"Total rows: {len(submission_df)}")

Saved ensemble submission to /home/submission/submission.csv
Total rows: 20100


In [11]:
# Verify the saved submission
verify_configs, _ = parse_submission('/home/submission/submission.csv')
verify_total, _ = calculate_total_score(verify_configs, TX, TY)
print(f"\nVerification of saved submission: {verify_total:.6f}")
print(f"Expected: {ensemble_total:.6f}")
print(f"Match: {abs(verify_total - ensemble_total) < 1e-6}")


Verification of saved submission: 70.624381
Expected: 70.624381
Match: True


In [None]:
# Save metrics
metrics = {
    'cv_score': verify_total,
    'baseline_score': all_scores['baseline'],
    'improvement': all_scores['baseline'] - verify_total,
    'source_counts': source_counts
}

with open('/home/code/experiments/002_snapshot_ensemble/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nSaved metrics to experiments/002_snapshot_ensemble/metrics.json")
print(f"CV Score: {verify_total:.6f}")