# Experiment 005: Per-N Ensemble from 116 Snapshots

Systematically scan ALL 116 snapshots and select the BEST solution for EACH N value.
This is the highest-leverage, lowest-risk improvement available.

In [1]:
import os
import pandas as pd
import math
from collections import defaultdict
import json
import glob

# Tree shape constants
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_polygon_bounds(cx, cy, deg):
    """Calculate bounding box of rotated tree polygon"""
    rad = deg * math.pi / 180.0
    s, c = math.sin(rad), math.cos(rad)
    x_coords = [TX[i] * c - TY[i] * s + cx for i in range(len(TX))]
    y_coords = [TX[i] * s + TY[i] * c + cy for i in range(len(TX))]
    return min(x_coords), max(x_coords), min(y_coords), max(y_coords)

def calculate_score_for_n(trees):
    """Calculate score for a single N configuration"""
    if not trees:
        return float('inf')
    
    global_x_min, global_x_max = float('inf'), float('-inf')
    global_y_min, global_y_max = float('inf'), float('-inf')
    
    for idx, cx, cy, deg in trees:
        x_min, x_max, y_min, y_max = get_polygon_bounds(cx, cy, deg)
        global_x_min = min(global_x_min, x_min)
        global_x_max = max(global_x_max, x_max)
        global_y_min = min(global_y_min, y_min)
        global_y_max = max(global_y_max, y_max)
    
    side = max(global_x_max - global_x_min, global_y_max - global_y_min)
    return side * side / len(trees)

print("Functions defined")

Functions defined


In [2]:
def load_submission(filepath):
    """Load submission file, returns dict mapping n -> list of (idx, x, y, deg)"""
    configurations = defaultdict(list)
    try:
        df = pd.read_csv(filepath)
        if len(df) < 20000:  # Skip incomplete files
            return {}
        for _, row in df.iterrows():
            id_parts = row['id'].split('_')
            n = int(id_parts[0])
            idx = int(id_parts[1])
            x = float(str(row['x']).replace('s', ''))
            y = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            configurations[n].append((idx, x, y, deg))
        for n in configurations:
            configurations[n].sort(key=lambda t: t[0])
        return dict(configurations)
    except Exception as e:
        return {}

print("Load function defined")

Load function defined


In [3]:
# Find ALL submission files in snapshots
SNAPSHOT_DIR = '/home/nonroot/snapshots/santa-2025'
snapshots = os.listdir(SNAPSHOT_DIR)
print(f"Found {len(snapshots)} snapshot directories")

# Find all submission.csv files
all_submission_files = []
for snapshot_id in snapshots:
    # Check standard location
    csv_path = f'{SNAPSHOT_DIR}/{snapshot_id}/submission/submission.csv'
    if os.path.exists(csv_path):
        all_submission_files.append(csv_path)
    
    # Check code folder
    code_csv = f'{SNAPSHOT_DIR}/{snapshot_id}/code/submission.csv'
    if os.path.exists(code_csv):
        all_submission_files.append(code_csv)
    
    # Check experiment folders
    exp_csvs = glob.glob(f'{SNAPSHOT_DIR}/{snapshot_id}/code/experiments/*/submission.csv')
    all_submission_files.extend(exp_csvs)

print(f"Found {len(all_submission_files)} submission files to scan")

Found 116 snapshot directories
Found 487 submission files to scan


In [4]:
# Initialize best per-N with baseline
baseline_path = '/home/code/experiments/002_valid_baseline/submission.csv'
baseline_configs = load_submission(baseline_path)

best_per_n = {}
for n in range(1, 201):
    if n in baseline_configs and len(baseline_configs[n]) == n:
        score = calculate_score_for_n(baseline_configs[n])
        best_per_n[n] = {'score': score, 'trees': baseline_configs[n], 'source': 'baseline'}
    else:
        best_per_n[n] = {'score': float('inf'), 'trees': None, 'source': None}

baseline_total = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"Baseline total score: {baseline_total:.6f}")

Baseline total score: 70.615102


In [5]:
# Scan ALL submission files
print(f"\nScanning {len(all_submission_files)} submission files...")
print("="*60)

improvements_found = []
files_processed = 0

for filepath in all_submission_files:
    configs = load_submission(filepath)
    if not configs:
        continue
    
    files_processed += 1
    
    for n in range(1, 201):
        if n not in configs or len(configs[n]) != n:
            continue
        
        score = calculate_score_for_n(configs[n])
        
        if score < best_per_n[n]['score'] - 1e-10:  # Meaningful improvement
            improvement = best_per_n[n]['score'] - score
            old_source = best_per_n[n]['source']
            
            best_per_n[n] = {'score': score, 'trees': configs[n], 'source': filepath}
            improvements_found.append((n, improvement, filepath))
            
            if improvement > 0.0001:  # Only print significant improvements
                print(f"✅ N={n}: {best_per_n[n]['score'] + improvement:.6f} -> {score:.6f} (improved by {improvement:.6f})")
    
    if files_processed % 50 == 0:
        print(f"  Processed {files_processed}/{len(all_submission_files)} files...")

print("="*60)
print(f"Processed {files_processed} valid submission files")
print(f"Found {len(improvements_found)} improvements")


Scanning 487 submission files...


✅ N=2: 0.450779 -> 0.437328 (improved by 0.013451)
✅ N=4: 0.416545 -> 0.411056 (improved by 0.005489)
✅ N=5: 0.416850 -> 0.394109 (improved by 0.022740)
✅ N=16: 0.374128 -> 0.373894 (improved by 0.000234)
✅ N=46: 0.364452 -> 0.364133 (improved by 0.000319)
✅ N=47: 0.357493 -> 0.356418 (improved by 0.001075)
✅ N=48: 0.355530 -> 0.355221 (improved by 0.000309)
✅ N=53: 0.361855 -> 0.358787 (improved by 0.003069)
✅ N=54: 0.356260 -> 0.352169 (improved by 0.004091)
✅ N=55: 0.355023 -> 0.346789 (improved by 0.008234)
✅ N=56: 0.352281 -> 0.340953 (improved by 0.011327)
✅ N=59: 0.360576 -> 0.360239 (improved by 0.000337)
✅ N=62: 0.353660 -> 0.353560 (improved by 0.000100)
✅ N=69: 0.354528 -> 0.353706 (improved by 0.000823)
✅ N=70: 0.349513 -> 0.349355 (improved by 0.000158)
✅ N=71: 0.352232 -> 0.348328 (improved by 0.003904)
✅ N=77: 0.351113 -> 0.350211 (improved by 0.000903)
✅ N=79: 0.348564 -> 0.348004 (improved by 0.000560)
✅ N=80: 0.344880 -> 0.343654 (improved by 0.001227)
✅ N=107: 0.3460

  Processed 50/487 files...


✅ N=7: 0.399897 -> 0.353857 (improved by 0.046040)
✅ N=11: 0.374924 -> 0.352785 (improved by 0.022139)
✅ N=12: 0.372724 -> 0.315008 (improved by 0.057716)
✅ N=13: 0.372294 -> 0.347303 (improved by 0.024991)
✅ N=14: 0.369543 -> 0.332917 (improved by 0.036626)
✅ N=16: 0.373894 -> 0.305943 (improved by 0.067952)
✅ N=17: 0.370040 -> 0.282957 (improved by 0.087084)
✅ N=18: 0.368771 -> 0.247397 (improved by 0.121375)
✅ N=19: 0.368615 -> 0.280140 (improved by 0.088475)
✅ N=20: 0.376057 -> 0.268968 (improved by 0.107089)
✅ N=21: 0.376451 -> 0.341558 (improved by 0.034893)
✅ N=22: 0.375258 -> 0.266288 (improved by 0.108970)
✅ N=23: 0.368750 -> 0.261462 (improved by 0.107288)
✅ N=24: 0.365506 -> 0.233483 (improved by 0.132023)
✅ N=25: 0.372144 -> 0.236288 (improved by 0.135857)
✅ N=26: 0.373997 -> 0.256869 (improved by 0.117128)
✅ N=27: 0.362719 -> 0.244315 (improved by 0.118404)
✅ N=28: 0.366105 -> 0.252629 (improved by 0.113476)
✅ N=29: 0.367082 -> 0.261655 (improved by 0.105427)
✅ N=30: 0.360

  Processed 100/487 files...


  Processed 150/487 files...


✅ N=3: 0.434745 -> 0.431512 (improved by 0.003233)


  Processed 200/487 files...


  Processed 250/487 files...


✅ N=2: 0.437328 -> 0.362022 (improved by 0.075306)
✅ N=3: 0.431512 -> 0.268713 (improved by 0.162799)
✅ N=4: 0.411056 -> 0.227236 (improved by 0.183820)
✅ N=5: 0.394109 -> 0.212694 (improved by 0.181415)
✅ N=6: 0.399610 -> 0.173625 (improved by 0.225985)
✅ N=7: 0.353857 -> 0.157468 (improved by 0.196389)
✅ N=8: 0.385407 -> 0.136883 (improved by 0.248524)
✅ N=9: 0.387415 -> 0.128142 (improved by 0.259273)
✅ N=10: 0.376630 -> 0.154792 (improved by 0.221838)
✅ N=11: 0.352785 -> 0.135039 (improved by 0.217746)
✅ N=12: 0.315008 -> 0.115983 (improved by 0.199025)
✅ N=13: 0.347303 -> 0.108253 (improved by 0.239051)
✅ N=14: 0.332917 -> 0.111515 (improved by 0.221402)
✅ N=15: 0.376949 -> 0.106120 (improved by 0.270829)
✅ N=16: 0.305943 -> 0.062169 (improved by 0.243774)
✅ N=17: 0.282957 -> 0.102560 (improved by 0.180396)
✅ N=18: 0.247397 -> 0.077631 (improved by 0.169765)
✅ N=19: 0.280140 -> 0.068570 (improved by 0.211570)
✅ N=20: 0.268968 -> 0.061771 (improved by 0.207197)
✅ N=21: 0.341558 -> 

  Processed 300/487 files...


  Processed 350/487 files...


  Processed 400/487 files...


  Processed 450/487 files...


Processed 460 valid submission files
Found 334 improvements


In [6]:
# Calculate ensemble score
ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"\nBaseline score: {baseline_total:.6f}")
print(f"Ensemble score: {ensemble_score:.6f}")
print(f"Total improvement: {baseline_total - ensemble_score:.6f}")

# Count sources
source_counts = defaultdict(int)
for n in range(1, 201):
    source_counts[best_per_n[n]['source']] += 1

print(f"\nSource distribution (top 10):")
for source, count in sorted(source_counts.items(), key=lambda x: -x[1])[:10]:
    print(f"  {source}: {count} N values")


Baseline score: 70.615102
Ensemble score: 27.478179
Total improvement: 43.136923

Source distribution (top 10):
  /home/nonroot/snapshots/santa-2025/21145963314/code/submission.csv: 143 N values
  /home/nonroot/snapshots/santa-2025/21165878844/code/experiments/002_ensemble/submission.csv: 56 N values
  baseline: 1 N values


In [7]:
# Show significant improvements
if improvements_found:
    significant = [(n, imp, src) for n, imp, src in improvements_found if imp > 0.0001]
    if significant:
        print(f"\nSignificant improvements (> 0.0001):")
        for n, imp, src in sorted(significant, key=lambda x: -x[1])[:20]:
            print(f"  N={n}: improved by {imp:.6f}")
    else:
        print("\nNo significant improvements found (all < 0.0001)")
else:
    print("\nNo improvements found at all")


Significant improvements (> 0.0001):
  N=21: improved by 0.274263
  N=15: improved by 0.270829
  N=9: improved by 0.259273
  N=8: improved by 0.248524
  N=16: improved by 0.243774
  N=13: improved by 0.239051
  N=6: improved by 0.225985
  N=10: improved by 0.221838
  N=14: improved by 0.221402
  N=11: improved by 0.217746
  N=36: improved by 0.216994
  N=24: improved by 0.216346
  N=29: improved by 0.215230
  N=170: improved by 0.214495
  N=187: improved by 0.213275
  N=191: improved by 0.213122
  N=185: improved by 0.213082
  N=200: improved by 0.212607
  N=192: improved by 0.212454
  N=145: improved by 0.211959


In [8]:
# Save ensemble submission
import csv

print("\nSaving ensemble submission...")

with open('/home/code/experiments/005_per_n_ensemble/submission.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['id', 'x', 'y', 'deg'])
    for n in range(1, 201):
        trees = best_per_n[n]['trees']
        if trees:
            for idx, x, y, deg in trees:
                writer.writerow([f'{n:03d}_{idx}', f's{x:.17f}', f's{y:.17f}', f's{deg:.17f}'])

# Also save to submission folder
os.makedirs('/home/submission', exist_ok=True)
with open('/home/submission/submission.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['id', 'x', 'y', 'deg'])
    for n in range(1, 201):
        trees = best_per_n[n]['trees']
        if trees:
            for idx, x, y, deg in trees:
                writer.writerow([f'{n:03d}_{idx}', f's{x:.17f}', f's{y:.17f}', f's{deg:.17f}'])

print(f"Saved ensemble submission")
print(f"Final score: {ensemble_score:.6f}")


Saving ensemble submission...
Saved ensemble submission
Final score: 27.478179


In [9]:
# Save metrics
metrics = {
    'cv_score': ensemble_score,
    'baseline_score': baseline_total,
    'improvement': baseline_total - ensemble_score,
    'improvements_found': len(improvements_found),
    'files_scanned': len(all_submission_files),
    'files_processed': files_processed
}

with open('/home/code/experiments/005_per_n_ensemble/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nMetrics saved")
print(f"CV Score: {ensemble_score:.6f}")
print(f"Improvement: {baseline_total - ensemble_score:.6f}")


Metrics saved
CV Score: 27.478179
Improvement: 43.136923


In [10]:
# CRITICAL: Validate ensemble for overlaps before submitting!
# The score of 27.48 is suspiciously good - likely contains overlapping solutions

from shapely.geometry import Polygon
from shapely import affinity
from decimal import Decimal, getcontext

getcontext().prec = 30
SCALE = 10**18

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, deg):
    poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(poly, deg, origin=(0, 0))
    return affinity.translate(rotated, x, y)

def validate_n_for_overlaps(trees):
    """Check if any trees in the configuration overlap."""
    if len(trees) <= 1:
        return True, "OK"
    
    polygons = []
    for idx, x, y, deg in trees:
        poly = create_tree_polygon(x, y, deg)
        # Scale to integers for exact arithmetic
        coords = list(poly.exterior.coords)
        int_coords = [(int(Decimal(str(c[0])) * SCALE), int(Decimal(str(c[1])) * SCALE)) for c in coords]
        int_poly = Polygon(int_coords)
        polygons.append(int_poly)
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 0:
                    return False, f"Trees {i} and {j} overlap"
    return True, "OK"

print("Validation function defined")

In [11]:
# Validate ALL N values
print("Validating ensemble for overlaps...")
overlap_errors = []
valid_count = 0

for n in range(1, 201):
    trees = best_per_n[n]['trees']
    if not trees:
        overlap_errors.append(n)
        continue
    
    ok, msg = validate_n_for_overlaps(trees)
    if ok:
        valid_count += 1
    else:
        overlap_errors.append(n)

print(f"Valid N values: {valid_count}/200")
print(f"Invalid N values: {len(overlap_errors)}")

if overlap_errors:
    print(f"\n⚠️ ENSEMBLE HAS {len(overlap_errors)} N VALUES WITH OVERLAPS!")
    print(f"Invalid N values: {overlap_errors[:30]}...")  # Show first 30
else:
    print("\n✅ All N values are valid! Safe to submit.")

In [12]:
# Fall back to baseline for N values with overlaps
if overlap_errors:
    print("Falling back to baseline for N values with overlaps...")
    
    # Reload baseline
    baseline_configs = load_submission('/home/code/experiments/002_valid_baseline/submission.csv')
    
    # Replace invalid N values with baseline
    for n in overlap_errors:
        if n in baseline_configs and len(baseline_configs[n]) == n:
            best_per_n[n] = {
                'score': calculate_score_for_n(baseline_configs[n]), 
                'trees': baseline_configs[n], 
                'source': 'baseline_fallback'
            }
    
    # Recalculate score
    new_ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201))
    print(f"\nNew ensemble score (with fallbacks): {new_ensemble_score:.6f}")
    print(f"Baseline score: {baseline_total:.6f}")
    print(f"Improvement: {baseline_total - new_ensemble_score:.6f}")
    
    # Update ensemble_score
    ensemble_score = new_ensemble_score
else:
    print("No fallbacks needed - all N values are valid!")