# Loop 4 Analysis: Ensemble Strategy Deep Dive

The evaluator correctly identified that single-solution optimization has hit a wall.
Three experiments (bbox3, sa_fast, fix_direction) all failed to improve the baseline.

The path forward is ENSEMBLE - combining best solutions from multiple sources.

In [1]:
import os
import glob
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import json

getcontext().prec = 25

print("Imports done")

Imports done


In [2]:
# Tree geometry and scoring functions
def make_tree_polygon(cx, cy, deg):
    """Create tree polygon at given position and rotation."""
    import math
    tw, th, bw, mw, ow = 0.15, 0.2, 0.7, 0.4, 0.25
    tip, t1, t2, base, tbot = 0.8, 0.5, 0.25, 0.0, -0.2
    
    tx = [0, ow/2, ow/4, mw/2, mw/4, bw/2, tw/2, tw/2, -tw/2, -tw/2, -bw/2, -mw/4, -mw/2, -ow/4, -ow/2]
    ty = [tip, t1, t1, t2, t2, base, base, tbot, tbot, base, base, t2, t2, t1, t1]
    
    r = deg * math.pi / 180
    c, s = math.cos(r), math.sin(r)
    
    coords = [(tx[i]*c - ty[i]*s + cx, tx[i]*s + ty[i]*c + cy) for i in range(15)]
    return Polygon(coords)

def score_group(xs, ys, degs):
    """Calculate score for a group of trees."""
    import math
    n = len(xs)
    tw, th, bw, mw, ow = 0.15, 0.2, 0.7, 0.4, 0.25
    tip, t1, t2, base, tbot = 0.8, 0.5, 0.25, 0.0, -0.2
    tx = [0, ow/2, ow/4, mw/2, mw/4, bw/2, tw/2, tw/2, -tw/2, -tw/2, -bw/2, -mw/4, -mw/2, -ow/4, -ow/2]
    ty = [tip, t1, t1, t2, t2, base, base, tbot, tbot, base, base, t2, t2, t1, t1]
    
    mnx, mny, mxx, mxy = 1e300, 1e300, -1e300, -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180
        c, s = math.cos(r), math.sin(r)
        for j in range(15):
            X = tx[j]*c - ty[j]*s + xs[i]
            Y = tx[j]*s + ty[j]*c + ys[i]
            mnx, mxx = min(mnx, X), max(mxx, X)
            mny, mxy = min(mny, Y), max(mxy, Y)
    
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    """Convert string values to float."""
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

print("Scoring functions defined")

Scoring functions defined


In [3]:
# Find all snapshots with submission files
snapshot_dir = '/home/nonroot/snapshots/santa-2025'
snapshots = sorted([d for d in os.listdir(snapshot_dir) if os.path.isdir(os.path.join(snapshot_dir, d))])

print(f"Found {len(snapshots)} snapshots")
print(f"First 5: {snapshots[:5]}")
print(f"Last 5: {snapshots[-5:]}")

Found 114 snapshots
First 5: ['20952569566', '20970671503', '20971964134', '20984924920', '20991308120']
Last 5: ['21336471306', '21336527339', '21337107511', '21337353543', '21337353626']


In [4]:
# Scan all snapshots for submission files and calculate scores
all_submissions = []

for snap in snapshots:
    # Check for submission.csv in various locations
    paths_to_check = [
        f'{snapshot_dir}/{snap}/submission/submission.csv',
        f'{snapshot_dir}/{snap}/code/submission.csv',
        f'{snapshot_dir}/{snap}/submission.csv',
    ]
    
    for path in paths_to_check:
        if os.path.exists(path):
            all_submissions.append({'snapshot': snap, 'path': path})
            break

print(f"Found {len(all_submissions)} submissions")

Found 89 submissions


In [5]:
# Calculate per-N scores for each submission
from tqdm import tqdm

best_per_n = {n: {'score': 1e300, 'snapshot': None, 'data': None} for n in range(1, 201)}
submission_scores = []

for sub in tqdm(all_submissions, desc="Scanning submissions"):
    try:
        df = pd.read_csv(sub['path'])
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        total_score = 0
        per_n_scores = {}
        
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].values)
            ys = strip(g['y'].values)
            ds = strip(g['deg'].values)
            sc = score_group(xs, ys, ds)
            per_n_scores[n] = sc
            total_score += sc
            
            if sc < best_per_n[n]['score']:
                best_per_n[n]['score'] = sc
                best_per_n[n]['snapshot'] = sub['snapshot']
                best_per_n[n]['data'] = g[['id', 'x', 'y', 'deg']].copy()
        
        submission_scores.append({
            'snapshot': sub['snapshot'],
            'path': sub['path'],
            'total_score': total_score,
            'per_n_scores': per_n_scores
        })
    except Exception as e:
        print(f"Error with {sub['path']}: {e}")

print(f"\nProcessed {len(submission_scores)} valid submissions")

Scanning submissions:   0%|          | 0/89 [00:00<?, ?it/s]

Scanning submissions:   1%|          | 1/89 [00:00<00:24,  3.58it/s]

Scanning submissions:   2%|▏         | 2/89 [00:00<00:22,  3.78it/s]

Scanning submissions:   3%|▎         | 3/89 [00:00<00:23,  3.59it/s]

Scanning submissions:   4%|▍         | 4/89 [00:01<00:22,  3.84it/s]

Scanning submissions:   6%|▌         | 5/89 [00:01<00:20,  4.03it/s]

Scanning submissions:   7%|▋         | 6/89 [00:01<00:20,  4.14it/s]

Scanning submissions:   8%|▊         | 7/89 [00:01<00:20,  4.07it/s]

Scanning submissions:   9%|▉         | 8/89 [00:01<00:19,  4.16it/s]

Scanning submissions:  10%|█         | 9/89 [00:02<00:19,  4.06it/s]

Scanning submissions:  11%|█         | 10/89 [00:02<00:19,  4.12it/s]

Scanning submissions:  12%|█▏        | 11/89 [00:02<00:20,  3.74it/s]

Scanning submissions:  13%|█▎        | 12/89 [00:03<00:20,  3.75it/s]

Scanning submissions:  15%|█▍        | 13/89 [00:03<00:19,  3.80it/s]

Scanning submissions:  16%|█▌        | 14/89 [00:03<00:20,  3.63it/s]

Scanning submissions:  17%|█▋        | 15/89 [00:03<00:20,  3.69it/s]

Scanning submissions:  18%|█▊        | 16/89 [00:04<00:20,  3.62it/s]

Scanning submissions:  19%|█▉        | 17/89 [00:04<00:19,  3.71it/s]

Scanning submissions:  20%|██        | 18/89 [00:04<00:18,  3.77it/s]

Scanning submissions:  21%|██▏       | 19/89 [00:04<00:18,  3.78it/s]

Scanning submissions:  22%|██▏       | 20/89 [00:05<00:18,  3.69it/s]

Scanning submissions:  24%|██▎       | 21/89 [00:05<00:18,  3.77it/s]

Scanning submissions:  25%|██▍       | 22/89 [00:05<00:17,  3.82it/s]

Scanning submissions:  26%|██▌       | 23/89 [00:06<00:17,  3.87it/s]

Scanning submissions:  27%|██▋       | 24/89 [00:06<00:16,  3.89it/s]

Scanning submissions:  28%|██▊       | 25/89 [00:06<00:16,  3.86it/s]

Scanning submissions:  29%|██▉       | 26/89 [00:06<00:15,  3.99it/s]

Scanning submissions:  30%|███       | 27/89 [00:06<00:15,  4.02it/s]

Scanning submissions:  31%|███▏      | 28/89 [00:07<00:14,  4.07it/s]

Scanning submissions:  33%|███▎      | 29/89 [00:07<00:15,  3.99it/s]

Scanning submissions:  34%|███▎      | 30/89 [00:07<00:14,  3.96it/s]

Scanning submissions:  35%|███▍      | 31/89 [00:07<00:14,  4.03it/s]

Scanning submissions:  36%|███▌      | 32/89 [00:08<00:13,  4.09it/s]

Scanning submissions:  37%|███▋      | 33/89 [00:08<00:13,  4.13it/s]

Scanning submissions:  38%|███▊      | 34/89 [00:08<00:14,  3.93it/s]

Scanning submissions:  40%|████      | 36/89 [00:09<00:11,  4.77it/s]

Scanning submissions:  42%|████▏     | 37/89 [00:09<00:11,  4.43it/s]

Scanning submissions:  43%|████▎     | 38/89 [00:09<00:11,  4.26it/s]

Scanning submissions:  44%|████▍     | 39/89 [00:09<00:12,  3.98it/s]

Scanning submissions:  45%|████▍     | 40/89 [00:10<00:12,  3.93it/s]

Scanning submissions:  46%|████▌     | 41/89 [00:10<00:12,  3.92it/s]

Scanning submissions:  47%|████▋     | 42/89 [00:10<00:12,  3.91it/s]

Scanning submissions:  48%|████▊     | 43/89 [00:10<00:12,  3.75it/s]

Scanning submissions:  49%|████▉     | 44/89 [00:11<00:12,  3.65it/s]

Scanning submissions:  51%|█████     | 45/89 [00:11<00:12,  3.64it/s]

Scanning submissions:  52%|█████▏    | 46/89 [00:11<00:11,  3.71it/s]

Scanning submissions:  53%|█████▎    | 47/89 [00:12<00:11,  3.78it/s]

Scanning submissions:  54%|█████▍    | 48/89 [00:12<00:11,  3.69it/s]

Scanning submissions:  55%|█████▌    | 49/89 [00:12<00:10,  3.75it/s]

Scanning submissions:  56%|█████▌    | 50/89 [00:12<00:10,  3.80it/s]

Scanning submissions:  57%|█████▋    | 51/89 [00:13<00:09,  3.84it/s]

Scanning submissions:  58%|█████▊    | 52/89 [00:13<00:09,  3.73it/s]

Scanning submissions:  60%|█████▉    | 53/89 [00:13<00:09,  3.77it/s]

Scanning submissions:  61%|██████    | 54/89 [00:13<00:09,  3.82it/s]

Scanning submissions:  62%|██████▏   | 55/89 [00:14<00:08,  3.85it/s]

Scanning submissions:  63%|██████▎   | 56/89 [00:14<00:08,  3.75it/s]

Scanning submissions:  64%|██████▍   | 57/89 [00:14<00:08,  3.67it/s]

Scanning submissions:  65%|██████▌   | 58/89 [00:14<00:08,  3.74it/s]

Scanning submissions:  66%|██████▋   | 59/89 [00:15<00:07,  3.80it/s]

Scanning submissions:  67%|██████▋   | 60/89 [00:15<00:07,  3.84it/s]

Scanning submissions:  69%|██████▊   | 61/89 [00:15<00:07,  3.73it/s]

Scanning submissions:  70%|██████▉   | 62/89 [00:16<00:07,  3.78it/s]

Scanning submissions:  71%|███████   | 63/89 [00:16<00:06,  3.83it/s]

Scanning submissions:  72%|███████▏  | 64/89 [00:16<00:06,  3.86it/s]

Scanning submissions:  73%|███████▎  | 65/89 [00:16<00:06,  3.89it/s]

Scanning submissions:  74%|███████▍  | 66/89 [00:17<00:06,  3.77it/s]

Scanning submissions:  75%|███████▌  | 67/89 [00:17<00:05,  3.81it/s]

Scanning submissions:  76%|███████▋  | 68/89 [00:17<00:05,  3.85it/s]

Scanning submissions:  78%|███████▊  | 69/89 [00:17<00:05,  3.87it/s]

Scanning submissions:  79%|███████▊  | 70/89 [00:18<00:05,  3.74it/s]

Scanning submissions:  80%|███████▉  | 71/89 [00:18<00:04,  3.74it/s]

Scanning submissions:  81%|████████  | 72/89 [00:18<00:04,  3.83it/s]

Scanning submissions:  82%|████████▏ | 73/89 [00:18<00:04,  3.88it/s]

Scanning submissions:  83%|████████▎ | 74/89 [00:19<00:03,  3.88it/s]

Scanning submissions:  84%|████████▍ | 75/89 [00:19<00:03,  3.76it/s]

Scanning submissions:  85%|████████▌ | 76/89 [00:19<00:03,  3.78it/s]

Scanning submissions:  87%|████████▋ | 77/89 [00:19<00:03,  3.82it/s]

Scanning submissions:  88%|████████▊ | 78/89 [00:20<00:02,  3.86it/s]

Scanning submissions:  89%|████████▉ | 79/89 [00:20<00:02,  3.76it/s]

Scanning submissions:  90%|████████▉ | 80/89 [00:20<00:02,  3.82it/s]

Scanning submissions:  91%|█████████ | 81/89 [00:20<00:02,  3.84it/s]

Scanning submissions:  92%|█████████▏| 82/89 [00:21<00:01,  3.85it/s]

Scanning submissions:  93%|█████████▎| 83/89 [00:21<00:01,  3.75it/s]

Scanning submissions:  94%|█████████▍| 84/89 [00:21<00:01,  3.76it/s]

Scanning submissions:  96%|█████████▌| 85/89 [00:22<00:01,  3.80it/s]

Scanning submissions:  97%|█████████▋| 86/89 [00:22<00:00,  3.85it/s]

Scanning submissions:  98%|█████████▊| 87/89 [00:22<00:00,  3.88it/s]

Scanning submissions:  99%|█████████▉| 88/89 [00:22<00:00,  3.77it/s]

Scanning submissions: 100%|██████████| 89/89 [00:23<00:00,  3.80it/s]

Scanning submissions: 100%|██████████| 89/89 [00:23<00:00,  3.85it/s]


Processed 88 valid submissions





In [6]:
# Show top 10 submissions by total score
submission_scores.sort(key=lambda x: x['total_score'])
print("Top 10 submissions by total score:")
print("="*60)
for i, sub in enumerate(submission_scores[:10]):
    print(f"{i+1}. {sub['snapshot']}: {sub['total_score']:.6f}")

Top 10 submissions by total score:
1. 21145966992: 70.572798
2. 21337353626: 70.572798
3. 21336527339: 70.615744
4. 21329067673: 70.615745
5. 21328310479: 70.615745
6. 21337107511: 70.615745
7. 21331543270: 70.615791
8. 21322576827: 70.616145
9. 21322576451: 70.619825
10. 21328309666: 70.619825


In [7]:
# Calculate ensemble score (best per-N from all sources)
ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"\nEnsemble score (best per-N): {ensemble_score:.6f}")
print(f"Current baseline: 70.647327")
print(f"Improvement: {70.647327 - ensemble_score:.6f}")
print(f"Target: 68.888293")
print(f"Gap to target: {ensemble_score - 68.888293:.6f}")


Ensemble score (best per-N): 70.523320
Current baseline: 70.647327
Improvement: 0.124007
Target: 68.888293
Gap to target: 1.635027


In [8]:
# Count which snapshots contribute to the ensemble
contributing_snapshots = {}
for n in range(1, 201):
    snap = best_per_n[n]['snapshot']
    if snap not in contributing_snapshots:
        contributing_snapshots[snap] = {'count': 0, 'n_values': []}
    contributing_snapshots[snap]['count'] += 1
    contributing_snapshots[snap]['n_values'].append(n)

print("\nSnapshots contributing to ensemble:")
print("="*60)
for snap, info in sorted(contributing_snapshots.items(), key=lambda x: -x[1]['count']):
    print(f"{snap}: {info['count']} N values")


Snapshots contributing to ensemble:
21145966992: 57 N values
21191209482: 51 N values
21322576827: 38 N values
21331543270: 21 N values
21165872902: 11 N values
21322577324: 8 N values
21322576451: 6 N values
21329069570: 4 N values
21104669204: 1 N values
21165874980: 1 N values
21121943993: 1 N values
21116303805: 1 N values


In [9]:
# Check for overlaps in the ensemble solution
def check_overlaps(xs, ys, degs, tolerance=1e-12):
    """Check if any trees overlap."""
    n = len(xs)
    if n <= 1:
        return []
    
    polygons = [make_tree_polygon(xs[i], ys[i], degs[i]) for i in range(n)]
    tree_index = STRtree(polygons)
    overlaps = []
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx > i:
                if polygons[i].intersects(polygons[idx]) and not polygons[i].touches(polygons[idx]):
                    intersection = polygons[i].intersection(polygons[idx])
                    if intersection.area > tolerance:
                        overlaps.append((i, idx, intersection.area))
    return overlaps

print("Checking ensemble for overlaps...")
overlap_count = 0
overlap_details = []

for n in range(1, 201):
    data = best_per_n[n]['data']
    if data is None:
        continue
    xs = strip(data['x'].values)
    ys = strip(data['y'].values)
    ds = strip(data['deg'].values)
    overlaps = check_overlaps(xs, ys, ds)
    if overlaps:
        overlap_count += 1
        overlap_details.append((n, len(overlaps), overlaps[0][2]))

print(f"\nN values with overlaps: {overlap_count}")
if overlap_details:
    print("First 10 overlap details:")
    for n, count, area in overlap_details[:10]:
        print(f"  N={n}: {count} overlaps, max area={area:.2e}")

Checking ensemble for overlaps...



N values with overlaps: 57
First 10 overlap details:
  N=2: 1 overlaps, max area=1.49e-01
  N=4: 4 overlaps, max area=1.33e-04
  N=5: 10 overlaps, max area=1.61e-02
  N=16: 21 overlaps, max area=1.67e-02
  N=40: 37 overlaps, max area=1.88e-02
  N=46: 39 overlaps, max area=8.35e-03
  N=47: 40 overlaps, max area=1.02e-02
  N=48: 48 overlaps, max area=2.17e-03
  N=53: 86 overlaps, max area=1.26e-05
  N=54: 89 overlaps, max area=1.26e-05


In [10]:
# Build ensemble from VALID (non-overlapping) solutions only
print("\nBuilding VALID ensemble (excluding overlapping solutions)...")

# First, identify which snapshots have overlaps for which N
valid_per_n = {n: {'score': 1e300, 'snapshot': None, 'data': None} for n in range(1, 201)}

for sub in tqdm(submission_scores, desc="Checking validity"):
    try:
        df = pd.read_csv(sub['path'])
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].values)
            ys = strip(g['y'].values)
            ds = strip(g['deg'].values)
            
            # Check for overlaps
            overlaps = check_overlaps(xs, ys, ds)
            if overlaps:
                continue  # Skip this N from this snapshot
            
            sc = sub['per_n_scores'].get(n, 1e300)
            if sc < valid_per_n[n]['score']:
                valid_per_n[n]['score'] = sc
                valid_per_n[n]['snapshot'] = sub['snapshot']
                valid_per_n[n]['data'] = g[['id', 'x', 'y', 'deg']].copy()
    except Exception as e:
        pass

valid_ensemble_score = sum(valid_per_n[n]['score'] for n in range(1, 201))
print(f"\nValid ensemble score: {valid_ensemble_score:.6f}")
print(f"Improvement from baseline: {70.647327 - valid_ensemble_score:.6f}")


Building VALID ensemble (excluding overlapping solutions)...


Checking validity:   0%|          | 0/88 [00:00<?, ?it/s]

Checking validity:   1%|          | 1/88 [00:02<03:14,  2.23s/it]

Checking validity:   2%|▏         | 2/88 [00:04<03:10,  2.21s/it]

Checking validity:   3%|▎         | 3/88 [00:05<02:38,  1.87s/it]

Checking validity:   5%|▍         | 4/88 [00:07<02:19,  1.66s/it]

Checking validity:   6%|▌         | 5/88 [00:08<02:07,  1.54s/it]

Checking validity:   7%|▋         | 6/88 [00:09<02:00,  1.47s/it]

Checking validity:   8%|▊         | 7/88 [00:11<01:56,  1.44s/it]

Checking validity:   9%|▉         | 8/88 [00:12<01:52,  1.41s/it]

Checking validity:  10%|█         | 9/88 [00:13<01:49,  1.39s/it]

Checking validity:  11%|█▏        | 10/88 [00:15<01:47,  1.38s/it]

Checking validity:  12%|█▎        | 11/88 [00:16<01:45,  1.37s/it]

Checking validity:  14%|█▎        | 12/88 [00:18<01:43,  1.36s/it]

Checking validity:  15%|█▍        | 13/88 [00:19<01:41,  1.36s/it]

Checking validity:  16%|█▌        | 14/88 [00:20<01:39,  1.35s/it]

Checking validity:  17%|█▋        | 15/88 [00:22<01:38,  1.34s/it]

Checking validity:  18%|█▊        | 16/88 [00:23<01:37,  1.35s/it]

Checking validity:  19%|█▉        | 17/88 [00:24<01:35,  1.35s/it]

Checking validity:  20%|██        | 18/88 [00:26<01:33,  1.34s/it]

Checking validity:  22%|██▏       | 19/88 [00:27<01:35,  1.38s/it]

Checking validity:  23%|██▎       | 20/88 [00:28<01:33,  1.37s/it]

Checking validity:  24%|██▍       | 21/88 [00:30<01:31,  1.36s/it]

Checking validity:  25%|██▌       | 22/88 [00:31<01:29,  1.36s/it]

Checking validity:  26%|██▌       | 23/88 [00:32<01:27,  1.35s/it]

Checking validity:  27%|██▋       | 24/88 [00:34<01:25,  1.34s/it]

Checking validity:  28%|██▊       | 25/88 [00:35<01:25,  1.36s/it]

Checking validity:  30%|██▉       | 26/88 [00:36<01:23,  1.35s/it]

Checking validity:  31%|███       | 27/88 [00:38<01:22,  1.35s/it]

Checking validity:  32%|███▏      | 28/88 [00:39<01:20,  1.34s/it]

Checking validity:  33%|███▎      | 29/88 [00:40<01:19,  1.35s/it]

Checking validity:  34%|███▍      | 30/88 [00:42<01:17,  1.34s/it]

Checking validity:  35%|███▌      | 31/88 [00:43<01:16,  1.34s/it]

Checking validity:  36%|███▋      | 32/88 [00:44<01:14,  1.34s/it]

Checking validity:  38%|███▊      | 33/88 [00:46<01:13,  1.34s/it]

Checking validity:  39%|███▊      | 34/88 [00:47<01:12,  1.35s/it]

Checking validity:  40%|███▉      | 35/88 [00:49<01:11,  1.34s/it]

Checking validity:  41%|████      | 36/88 [00:50<01:09,  1.34s/it]

Checking validity:  42%|████▏     | 37/88 [00:51<01:08,  1.34s/it]

Checking validity:  43%|████▎     | 38/88 [00:53<01:07,  1.35s/it]

Checking validity:  44%|████▍     | 39/88 [00:54<01:05,  1.34s/it]

Checking validity:  45%|████▌     | 40/88 [00:55<01:04,  1.34s/it]

Checking validity:  47%|████▋     | 41/88 [00:57<01:02,  1.34s/it]

Checking validity:  48%|████▊     | 42/88 [00:58<01:02,  1.35s/it]

Checking validity:  49%|████▉     | 43/88 [00:59<01:00,  1.35s/it]

Checking validity:  50%|█████     | 44/88 [01:01<00:59,  1.34s/it]

Checking validity:  51%|█████     | 45/88 [01:02<00:57,  1.34s/it]

Checking validity:  52%|█████▏    | 46/88 [01:03<00:56,  1.34s/it]

Checking validity:  53%|█████▎    | 47/88 [01:05<00:55,  1.35s/it]

Checking validity:  55%|█████▍    | 48/88 [01:06<00:53,  1.35s/it]

Checking validity:  56%|█████▌    | 49/88 [01:07<00:52,  1.34s/it]

Checking validity:  57%|█████▋    | 50/88 [01:09<00:50,  1.34s/it]

Checking validity:  58%|█████▊    | 51/88 [01:10<00:49,  1.35s/it]

Checking validity:  59%|█████▉    | 52/88 [01:11<00:48,  1.34s/it]

Checking validity:  60%|██████    | 53/88 [01:13<00:46,  1.34s/it]

Checking validity:  61%|██████▏   | 54/88 [01:14<00:45,  1.34s/it]

Checking validity:  62%|██████▎   | 55/88 [01:15<00:44,  1.34s/it]

Checking validity:  64%|██████▎   | 56/88 [01:17<00:43,  1.35s/it]

Checking validity:  65%|██████▍   | 57/88 [01:18<00:41,  1.34s/it]

Checking validity:  66%|██████▌   | 58/88 [01:19<00:40,  1.34s/it]

Checking validity:  67%|██████▋   | 59/88 [01:21<00:38,  1.34s/it]

Checking validity:  68%|██████▊   | 60/88 [01:22<00:37,  1.35s/it]

Checking validity:  69%|██████▉   | 61/88 [01:23<00:36,  1.35s/it]

Checking validity:  70%|███████   | 62/88 [01:25<00:35,  1.35s/it]

Checking validity:  72%|███████▏  | 63/88 [01:26<00:33,  1.34s/it]

Checking validity:  73%|███████▎  | 64/88 [01:27<00:32,  1.34s/it]

Checking validity:  74%|███████▍  | 65/88 [01:29<00:31,  1.35s/it]

Checking validity:  75%|███████▌  | 66/88 [01:30<00:29,  1.35s/it]

Checking validity:  76%|███████▌  | 67/88 [01:32<00:28,  1.35s/it]

Checking validity:  77%|███████▋  | 68/88 [01:33<00:26,  1.34s/it]

Checking validity:  78%|███████▊  | 69/88 [01:36<00:33,  1.78s/it]

Checking validity:  80%|███████▉  | 70/88 [01:38<00:37,  2.08s/it]

Checking validity:  81%|████████  | 71/88 [01:41<00:39,  2.30s/it]

Checking validity:  82%|████████▏ | 72/88 [01:44<00:39,  2.44s/it]

Checking validity:  83%|████████▎ | 73/88 [01:47<00:38,  2.56s/it]

Checking validity:  84%|████████▍ | 74/88 [01:50<00:36,  2.63s/it]

Checking validity:  85%|████████▌ | 75/88 [01:52<00:32,  2.53s/it]

Checking validity:  86%|████████▋ | 76/88 [01:53<00:25,  2.09s/it]

Checking validity:  88%|████████▊ | 77/88 [01:54<00:19,  1.79s/it]

Checking validity:  89%|████████▊ | 78/88 [01:55<00:15,  1.59s/it]

Checking validity:  90%|████████▉ | 79/88 [01:56<00:12,  1.44s/it]

Checking validity:  91%|█████████ | 80/88 [01:57<00:10,  1.32s/it]

Checking validity:  92%|█████████▏| 81/88 [01:58<00:08,  1.23s/it]

Checking validity:  93%|█████████▎| 82/88 [01:59<00:07,  1.17s/it]

Checking validity:  94%|█████████▍| 83/88 [02:00<00:05,  1.12s/it]

Checking validity:  95%|█████████▌| 84/88 [02:01<00:04,  1.11s/it]

Checking validity:  97%|█████████▋| 85/88 [02:02<00:03,  1.07s/it]

Checking validity:  98%|█████████▊| 86/88 [02:03<00:02,  1.05s/it]

Checking validity:  99%|█████████▉| 87/88 [02:05<00:01,  1.06s/it]

Checking validity: 100%|██████████| 88/88 [02:05<00:00,  1.07it/s]

Checking validity: 100%|██████████| 88/88 [02:05<00:00,  1.43s/it]


Valid ensemble score: 70.615744
Improvement from baseline: 0.031583





In [11]:
# Count contributing snapshots for valid ensemble
valid_contributing = {}
for n in range(1, 201):
    snap = valid_per_n[n]['snapshot']
    if snap not in valid_contributing:
        valid_contributing[snap] = 0
    valid_contributing[snap] += 1

print("\nValid snapshots contributing to ensemble:")
print("="*60)
for snap, count in sorted(valid_contributing.items(), key=lambda x: -x[1]):
    print(f"{snap}: {count} N values")


Valid snapshots contributing to ensemble:
21336527339: 194 N values
21331543270: 4 N values
21145966992: 2 N values


In [12]:
# Save the valid ensemble submission
print("\nSaving valid ensemble submission...")

ensemble_rows = []
for n in range(1, 201):
    data = valid_per_n[n]['data']
    if data is not None:
        for _, row in data.iterrows():
            ensemble_rows.append(row)

ensemble_df = pd.DataFrame(ensemble_rows)
print(f"Total rows: {len(ensemble_df)}")
print(f"Expected: 20100")

# Save
os.makedirs('/home/code/experiments/004_ensemble_valid', exist_ok=True)
ensemble_df.to_csv('/home/code/experiments/004_ensemble_valid/submission.csv', index=False)
ensemble_df.to_csv('/home/submission/submission.csv', index=False)

print("\nFirst 5 rows:")
print(ensemble_df.head())


Saving valid ensemble submission...


Total rows: 20100
Expected: 20100

First 5 rows:
      id                         x                         y  \
0  001_0   s-48.196086194214246001    s58.770984615214225000   
1  002_0   s0.15409700000000001174  s-0.03854099999999999887   
2  002_1  s-0.15409700000000001174  s-0.56145900000000004137   
3  003_0        s0.254937643697833       s-0.233436061549416   
4  003_1        s0.357722754471247        s0.250360566787394   

                         deg  
0     s45.000000000000000000  
1  s203.62937800000000265754  
2   s23.62937799999999910483  
3        s113.56326044172948  
4           s66.370622269343  


In [None]:
# Save metrics
metrics = {
    'cv_score': valid_ensemble_score,
    'baseline_score': 70.647327,
    'improvement': 70.647327 - valid_ensemble_score,
    'target': 68.888293,
    'gap': valid_ensemble_score - 68.888293,
    'num_snapshots_used': len([s for s in valid_contributing if valid_contributing[s] > 0]),
    'total_snapshots_scanned': len(submission_scores)
}

with open('/home/code/experiments/004_ensemble_valid/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nMetrics saved: {metrics}")
print(f"\n{'='*60}")
print(f"SUMMARY:")
print(f"  Valid ensemble score: {valid_ensemble_score:.6f}")
print(f"  Baseline: 70.647327")
print(f"  Improvement: {70.647327 - valid_ensemble_score:.6f}")
print(f"  Target: 68.888293")
print(f"  Gap to target: {valid_ensemble_score - 68.888293:.6f}")
print(f"{'='*60}")