# Loop 4 Analysis: Ensemble Approach Discovery

The evaluator correctly identified that single-solution optimization is stuck. The jonathanchan kernel shows the path forward: **ENSEMBLE** - combine the best solution for each N from multiple sources.

Let's analyze all available snapshots to find diverse solutions.

In [1]:
import pandas as pd
import numpy as np
import os
import glob
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely.ops import unary_union
import math
from numba import njit

getcontext().prec = 25

print("Imports done")

Imports done


In [2]:
# Fast scoring function using numba
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs,ys,degs,tx,ty):
    n=xs.size; V=tx.size
    mnx=1e300; mny=1e300; mxx=-1e300; mxy=-1e300
    for i in range(n):
        r=degs[i]*math.pi/180.0
        c=math.cos(r); s=math.sin(r)
        xi=xs[i]; yi=ys[i]
        for j in range(V):
            X=c*tx[j]-s*ty[j]+xi
            Y=s*tx[j]+c*ty[j]+yi
            if X<mnx: mnx=X
            if X>mxx: mxx=X
            if Y<mny: mny=Y
            if Y>mxy: mxy=Y
    side=max(mxx-mnx,mxy-mny)
    return side*side/n

def strip(a):
    return np.array([float(str(v).replace('s','')) for v in a],np.float64)

tx, ty = make_polygon_template()
print("Scoring functions ready")

Scoring functions ready


In [3]:
# Find all snapshot submissions
snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
all_snapshots = sorted([d for d in os.listdir(snapshot_dir) if os.path.isdir(os.path.join(snapshot_dir, d))])
print(f"Found {len(all_snapshots)} snapshots")

# Find all valid submissions
valid_submissions = []
for snap in all_snapshots:
    sub_path = os.path.join(snapshot_dir, snap, 'submission', 'submission.csv')
    if os.path.exists(sub_path):
        valid_submissions.append((snap, sub_path))

print(f"Found {len(valid_submissions)} valid submissions")

Found 114 snapshots
Found 87 valid submissions


In [4]:
# Score each submission and find per-N scores
print("Scoring all submissions...")

all_scores = {}  # {snapshot: {n: score}}
total_scores = {}  # {snapshot: total_score}

for snap, path in valid_submissions[:50]:  # Limit to 50 for speed
    try:
        df = pd.read_csv(path)
        if not {'id','x','y','deg'}.issubset(df.columns):
            continue
        
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        
        snap_scores = {}
        total = 0
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].to_numpy())
            ys = strip(g['y'].to_numpy())
            ds = strip(g['deg'].to_numpy())
            sc = score_group(xs, ys, ds, tx, ty)
            snap_scores[n] = sc
            total += sc
        
        all_scores[snap] = snap_scores
        total_scores[snap] = total
        
    except Exception as e:
        continue

print(f"Successfully scored {len(all_scores)} submissions")

# Show top 10 by total score
sorted_snaps = sorted(total_scores.items(), key=lambda x: x[1])
print("\nTop 10 snapshots by total score:")
for snap, score in sorted_snaps[:10]:
    print(f"  {snap}: {score:.6f}")

Scoring all submissions...


Successfully scored 49 submissions

Top 10 snapshots by total score:
  21145966992: 70.572798
  21180223864: 70.630429
  21165874980: 70.630478
  21180219583: 70.630478
  21180221700: 70.630478
  21165872902: 70.647306
  21165876936: 70.647306
  21165878844: 70.659436
  21156850282: 70.659437
  21156851249: 70.659437


In [None]:
# Find the BEST solution for each N across all snapshots
print("\nFinding best solution for each N...")

best_per_n = {}  # {n: (score, snapshot)}
for snap, scores in all_scores.items():
    for n, sc in scores.items():
        if n not in best_per_n or sc < best_per_n[n][0]:
            best_per_n[n] = (sc, snap)

# Calculate ensemble score
ensemble_score = sum(best_per_n[n][0] for n in range(1, 201))
print(f"\nEnsemble score (best per N): {ensemble_score:.6f}")
print(f"Current baseline: 70.647327")
print(f"Improvement: {70.647327 - ensemble_score:.6f}")

# Show which snapshots contribute
contributing_snaps = {}
for n, (sc, snap) in best_per_n.items():
    if snap not in contributing_snaps:
        contributing_snaps[snap] = []
    contributing_snaps[snap].append(n)

print(f"\nSnapshots contributing to ensemble: {len(contributing_snaps)}")
for snap, ns in sorted(contributing_snaps.items(), key=lambda x: -len(x[1]))[:10]:
    print(f"  {snap}: {len(ns)} N values")

In [None]:
# Check for overlaps in the best solutions
from shapely.geometry import Polygon
from shapely.strtree import STRtree

def check_overlaps_fast(xs, ys, degs, tolerance=1e-12):
    """Check for overlaps using Shapely."""
    n = len(xs)
    if n <= 1:
        return []
    
    # Create tree polygons
    trunk_w = 0.15; trunk_h = 0.2; base_w = 0.7; mid_w = 0.4; top_w = 0.25
    tip_y = 0.8; t1_y = 0.5; t2_y = 0.25; base_y = 0.0; tbot_y = -trunk_h
    
    template = [
        (0.0, tip_y),
        (top_w/2, t1_y), (top_w/4, t1_y),
        (mid_w/2, t2_y), (mid_w/4, t2_y),
        (base_w/2, base_y),
        (trunk_w/2, base_y), (trunk_w/2, tbot_y),
        (-trunk_w/2, tbot_y), (-trunk_w/2, base_y),
        (-base_w/2, base_y),
        (-mid_w/4, t2_y), (-mid_w/2, t2_y),
        (-top_w/4, t1_y), (-top_w/2, t1_y),
    ]
    
    polygons = []
    for i in range(n):
        rad = degs[i] * math.pi / 180
        c, s = math.cos(rad), math.sin(rad)
        coords = [(c*x - s*y + xs[i], s*x + c*y + ys[i]) for x, y in template]
        polygons.append(Polygon(coords))
    
    overlaps = []
    tree = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree.query(poly)
        for idx in indices:
            if idx > i:
                if polygons[i].intersects(polygons[idx]) and not polygons[i].touches(polygons[idx]):
                    intersection = polygons[i].intersection(polygons[idx])
                    if intersection.area > tolerance:
                        overlaps.append((i, idx, intersection.area))
    return overlaps

print("Checking overlaps in best solutions...")
overlap_ns = []
for n in range(1, 201):
    sc, snap = best_per_n[n]
    path = os.path.join(snapshot_dir, snap, 'submission', 'submission.csv')
    df = pd.read_csv(path)
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    g = df[df['N'] == n]
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    overlaps = check_overlaps_fast(xs, ys, ds)
    if overlaps:
        overlap_ns.append((n, snap, len(overlaps), max(o[2] for o in overlaps)))

print(f"\nN values with overlaps: {len(overlap_ns)}")
if overlap_ns:
    print("First 10:")
    for n, snap, count, max_area in overlap_ns[:10]:
        print(f"  N={n}: {count} overlaps, max_area={max_area:.2e}, from {snap}")

In [None]:
# For N values with overlaps, find the best VALID solution
print("\nFinding best VALID solution for each N with overlaps...")

for n, bad_snap, _, _ in overlap_ns:
    # Try other snapshots for this N
    best_valid_score = float('inf')
    best_valid_snap = None
    
    for snap, scores in all_scores.items():
        if n not in scores:
            continue
        
        # Check if this snapshot's solution for N is valid
        path = os.path.join(snapshot_dir, snap, 'submission', 'submission.csv')
        df = pd.read_csv(path)
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        g = df[df['N'] == n]
        xs = strip(g['x'].to_numpy())
        ys = strip(g['y'].to_numpy())
        ds = strip(g['deg'].to_numpy())
        overlaps = check_overlaps_fast(xs, ys, ds)
        
        if not overlaps and scores[n] < best_valid_score:
            best_valid_score = scores[n]
            best_valid_snap = snap
    
    if best_valid_snap:
        old_score = best_per_n[n][0]
        best_per_n[n] = (best_valid_score, best_valid_snap)
        print(f"  N={n}: replaced {bad_snap} ({old_score:.6f}) with {best_valid_snap} ({best_valid_score:.6f})")
    else:
        print(f"  N={n}: NO valid solution found!")

# Recalculate ensemble score
ensemble_score_valid = sum(best_per_n[n][0] for n in range(1, 201))
print(f"\nValid ensemble score: {ensemble_score_valid:.6f}")
print(f"Current baseline: 70.647327")
print(f"Improvement: {70.647327 - ensemble_score_valid:.6f}")

In [None]:
# Save the ensemble mapping for the executor
import json

ensemble_map = {}
for n in range(1, 201):
    sc, snap = best_per_n[n]
    ensemble_map[str(n)] = {
        'snapshot': snap,
        'score': float(sc)
    }

with open('/home/code/exploration/ensemble_map.json', 'w') as f:
    json.dump(ensemble_map, f, indent=2)

print("Saved ensemble map to /home/code/exploration/ensemble_map.json")
print(f"\nSummary:")
print(f"  Ensemble score: {ensemble_score_valid:.6f}")
print(f"  Baseline score: 70.647327")
print(f"  Improvement: {70.647327 - ensemble_score_valid:.6f}")
print(f"  Gap to target: {ensemble_score_valid - 68.888293:.6f}")