# Experiment 001: Ensemble from All Snapshots

Goal: Improve from 70.676 to ~70.55 by using better sources and ensemble approach.

Strategy:
1. Load best available baseline (70.627569)
2. Scan ALL snapshots for best per-N solutions
3. For each N, select the configuration with lowest score that has no overlaps
4. Create ensemble submission

In [1]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
from utils import (
    ChristmasTree, load_submission, load_trees_for_n, get_trees_data_for_n,
    has_overlap, get_bounding_box_side, calculate_score_for_n, score_submission,
    find_all_submission_csvs, is_valid_submission, save_submission
)
import os
from collections import defaultdict
import json

print("Utilities loaded successfully!")

Utilities loaded successfully!


In [2]:
# Load the best available baseline
best_baseline_path = '/home/nonroot/snapshots/santa-2025/21329069570/code/code/solutions/submission_70.627569.csv'
baseline_df = load_submission(best_baseline_path)
print(f"Baseline loaded: {baseline_df.shape}")

# Score the baseline
baseline_score, baseline_scores_by_n, baseline_overlaps = score_submission(baseline_df, check_overlaps=True)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Baseline overlaps: {baseline_overlaps}")

Baseline loaded: (20100, 4)


Baseline score: 70.627569
Baseline overlaps: []


In [3]:
# Find all CSV files in snapshots
snapshot_base = '/home/nonroot/snapshots/santa-2025/'
all_csvs = find_all_submission_csvs(snapshot_base)
print(f"Found {len(all_csvs)} CSV files in snapshots")

# Filter to only valid submissions
valid_submissions = []
for csv_path in all_csvs:
    try:
        df = pd.read_csv(csv_path)
        if is_valid_submission(df):
            valid_submissions.append(csv_path)
    except Exception as e:
        pass

print(f"Found {len(valid_submissions)} valid submission files")

Found 3237 CSV files in snapshots


Found 3199 valid submission files


In [4]:
# For each N, find the best score across all submissions
# Store: best_per_n[n] = {'score': score, 'source': path, 'data': df_rows}

best_per_n = {}

# Initialize with baseline
for n in range(1, 201):
    trees = load_trees_for_n(baseline_df, n)
    score_n = calculate_score_for_n(trees, n)
    data = get_trees_data_for_n(baseline_df, n)
    best_per_n[n] = {
        'score': score_n,
        'source': best_baseline_path,
        'data': data,
        'has_overlap': False
    }

print(f"Initialized best_per_n with baseline scores")
print(f"Sample - N=1: score={best_per_n[1]['score']:.6f}")
print(f"Sample - N=100: score={best_per_n[100]['score']:.6f}")

Initialized best_per_n with baseline scores
Sample - N=1: score=0.661250
Sample - N=100: score=0.343427


In [5]:
# Scan all valid submissions for better per-N scores
import warnings
warnings.filterwarnings('ignore')

improvements = defaultdict(list)
processed = 0

for csv_path in valid_submissions:
    try:
        df = pd.read_csv(csv_path)
        
        for n in range(1, 201):
            trees = load_trees_for_n(df, n)
            if len(trees) != n:
                continue
            
            # Calculate score
            score_n = calculate_score_for_n(trees, n)
            
            # Only consider if better than current best
            if score_n < best_per_n[n]['score']:
                # Check for overlaps
                has_ovlp, _ = has_overlap(trees, tolerance=1e-12)
                
                if not has_ovlp:
                    improvement = best_per_n[n]['score'] - score_n
                    improvements[n].append({
                        'improvement': improvement,
                        'new_score': score_n,
                        'old_score': best_per_n[n]['score'],
                        'source': csv_path
                    })
                    
                    # Update best
                    data = get_trees_data_for_n(df, n)
                    best_per_n[n] = {
                        'score': score_n,
                        'source': csv_path,
                        'data': data,
                        'has_overlap': False
                    }
        
        processed += 1
        if processed % 50 == 0:
            print(f"Processed {processed}/{len(valid_submissions)} submissions...")
            
    except Exception as e:
        pass

print(f"\nProcessed {processed} submissions")
print(f"Found improvements for {len(improvements)} N values")

Processed 50/3199 submissions...


Processed 100/3199 submissions...


Processed 150/3199 submissions...


Processed 200/3199 submissions...


Processed 250/3199 submissions...


Processed 300/3199 submissions...


Processed 350/3199 submissions...


Processed 400/3199 submissions...


Processed 450/3199 submissions...


Processed 500/3199 submissions...


Processed 550/3199 submissions...


Processed 600/3199 submissions...


Processed 650/3199 submissions...


Processed 700/3199 submissions...


Processed 750/3199 submissions...


Processed 800/3199 submissions...


Processed 850/3199 submissions...


Processed 900/3199 submissions...


Processed 950/3199 submissions...


Processed 1000/3199 submissions...


Processed 1050/3199 submissions...


Processed 1100/3199 submissions...


Processed 1150/3199 submissions...


Processed 1200/3199 submissions...


Processed 1250/3199 submissions...


Processed 1300/3199 submissions...


Processed 1350/3199 submissions...


Processed 1400/3199 submissions...


Processed 1450/3199 submissions...


Processed 1500/3199 submissions...


Processed 1550/3199 submissions...


Processed 1600/3199 submissions...


Processed 1650/3199 submissions...


Processed 1700/3199 submissions...


Processed 1750/3199 submissions...


Processed 1800/3199 submissions...


Processed 1850/3199 submissions...


Processed 1900/3199 submissions...


Processed 1950/3199 submissions...


Processed 2000/3199 submissions...


Processed 2050/3199 submissions...


Processed 2100/3199 submissions...


Processed 2150/3199 submissions...


Processed 2200/3199 submissions...


Processed 2250/3199 submissions...


Processed 2300/3199 submissions...


Processed 2350/3199 submissions...


Processed 2400/3199 submissions...


Processed 2450/3199 submissions...


Processed 2500/3199 submissions...


Processed 2550/3199 submissions...


Processed 2600/3199 submissions...


Processed 2650/3199 submissions...


Processed 2700/3199 submissions...


Processed 2750/3199 submissions...


Processed 2800/3199 submissions...


Processed 2850/3199 submissions...


Processed 2900/3199 submissions...


Processed 2950/3199 submissions...


Processed 3000/3199 submissions...


Processed 3050/3199 submissions...


Processed 3100/3199 submissions...


Processed 3150/3199 submissions...



Processed 3192 submissions
Found improvements for 169 N values


In [6]:
# Show top improvements
all_improvements = []
for n, imps in improvements.items():
    for imp in imps:
        all_improvements.append((n, imp['improvement'], imp['new_score'], imp['source']))

all_improvements.sort(key=lambda x: -x[1])  # Sort by improvement descending

print("Top 20 improvements found:")
print("-" * 80)
for n, imp, new_score, source in all_improvements[:20]:
    short_source = source.split('/')[-1]
    print(f"N={n:3d}: improvement={imp:.6f}, new_score={new_score:.6f}, source={short_source}")

Top 20 improvements found:
--------------------------------------------------------------------------------
N= 87: improvement=0.003732, new_score=0.349960, source=submission2.csv
N= 65: improvement=0.002182, new_score=0.361611, source=submission2.csv
N=136: improvement=0.001008, new_score=0.344159, source=submission2.csv
N= 88: improvement=0.000696, new_score=0.347501, source=submission2.csv
N= 35: improvement=0.000570, new_score=0.366057, source=submission2.csv
N= 36: improvement=0.000342, new_score=0.358049, source=submission2.csv
N=169: improvement=0.000268, new_score=0.342231, source=submission2.csv
N=173: improvement=0.000265, new_score=0.339220, source=submission2.csv
N= 63: improvement=0.000241, new_score=0.352794, source=submission2.csv
N=128: improvement=0.000219, new_score=0.340751, source=submission2.csv
N= 54: improvement=0.000184, new_score=0.356260, source=submission2.csv
N= 76: improvement=0.000173, new_score=0.349595, source=submission2.csv
N=184: improvement=0.000151,

In [7]:
# Calculate ensemble score
ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"\nEnsemble score: {ensemble_score:.6f}")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Improvement: {baseline_score - ensemble_score:.6f}")
print(f"\nTarget: 68.890873")
print(f"Gap to target: {ensemble_score - 68.890873:.6f}")


Ensemble score: 70.615744
Baseline score: 70.627569
Improvement: 0.011826

Target: 68.890873
Gap to target: 1.724871


In [8]:
# Create ensemble submission
ensemble_rows = []
for n in range(1, 201):
    ensemble_rows.append(best_per_n[n]['data'])

ensemble_df = pd.concat(ensemble_rows, ignore_index=True)
print(f"Ensemble submission shape: {ensemble_df.shape}")

# Verify the ensemble
ensemble_verify_score, _, ensemble_overlaps = score_submission(ensemble_df, check_overlaps=True)
print(f"Verified ensemble score: {ensemble_verify_score:.6f}")
print(f"Ensemble overlaps: {ensemble_overlaps}")

Ensemble submission shape: (20100, 4)


Verified ensemble score: 70.615744
Ensemble overlaps: []


In [9]:
# Save ensemble submission
ensemble_path = '/home/code/experiments/001_ensemble/submission.csv'
ensemble_df.to_csv(ensemble_path, index=False)
print(f"Saved ensemble to {ensemble_path}")

# Copy to submission folder
import shutil
shutil.copy(ensemble_path, '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

# Save metrics
metrics = {
    'cv_score': ensemble_verify_score,
    'baseline_score': baseline_score,
    'improvement': baseline_score - ensemble_verify_score,
    'n_improved': len(improvements),
    'overlaps': ensemble_overlaps
}
with open('/home/code/experiments/001_ensemble/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\nMetrics saved: {metrics}")

Saved ensemble to /home/code/experiments/001_ensemble/submission.csv
Copied to /home/submission/submission.csv

Metrics saved: {'cv_score': 70.61574363614818, 'baseline_score': 70.62756944140376, 'improvement': 0.011825805255583077, 'n_improved': 169, 'overlaps': []}


In [10]:
# Summary
print("=" * 60)
print("EXPERIMENT 001: ENSEMBLE SUMMARY")
print("=" * 60)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Ensemble score: {ensemble_verify_score:.6f}")
print(f"Improvement: {baseline_score - ensemble_verify_score:.6f}")
print(f"N values improved: {len(improvements)}")
print(f"Overlaps: {len(ensemble_overlaps)}")
print(f"\nTarget: 68.890873")
print(f"Gap to target: {ensemble_verify_score - 68.890873:.6f} ({(ensemble_verify_score - 68.890873) / 68.890873 * 100:.2f}%)")
print("=" * 60)

EXPERIMENT 001: ENSEMBLE SUMMARY
Baseline score: 70.627569
Ensemble score: 70.615744
Improvement: 0.011826
N values improved: 169
Overlaps: 0

Target: 68.890873
Gap to target: 1.724871 (2.50%)
