# Loop 4 Analysis: Ensemble Strategy Deep Dive

The evaluator correctly identified that single-solution optimization has hit a wall.
Three experiments (bbox3, sa_fast, fix_direction) all failed to improve the baseline.

The path forward is ENSEMBLE - combining best solutions from multiple sources.

In [None]:
import os
import glob
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import json

getcontext().prec = 25

print("Imports done")

In [None]:
# Tree geometry and scoring functions
def make_tree_polygon(cx, cy, deg):
    """Create tree polygon at given position and rotation."""
    import math
    tw, th, bw, mw, ow = 0.15, 0.2, 0.7, 0.4, 0.25
    tip, t1, t2, base, tbot = 0.8, 0.5, 0.25, 0.0, -0.2
    
    tx = [0, ow/2, ow/4, mw/2, mw/4, bw/2, tw/2, tw/2, -tw/2, -tw/2, -bw/2, -mw/4, -mw/2, -ow/4, -ow/2]
    ty = [tip, t1, t1, t2, t2, base, base, tbot, tbot, base, base, t2, t2, t1, t1]
    
    r = deg * math.pi / 180
    c, s = math.cos(r), math.sin(r)
    
    coords = [(tx[i]*c - ty[i]*s + cx, tx[i]*s + ty[i]*c + cy) for i in range(15)]
    return Polygon(coords)

def score_group(xs, ys, degs):
    """Calculate score for a group of trees."""
    import math
    n = len(xs)
    tw, th, bw, mw, ow = 0.15, 0.2, 0.7, 0.4, 0.25
    tip, t1, t2, base, tbot = 0.8, 0.5, 0.25, 0.0, -0.2
    tx = [0, ow/2, ow/4, mw/2, mw/4, bw/2, tw/2, tw/2, -tw/2, -tw/2, -bw/2, -mw/4, -mw/2, -ow/4, -ow/2]
    ty = [tip, t1, t1, t2, t2, base, base, tbot, tbot, base, base, t2, t2, t1, t1]
    
    mnx, mny, mxx, mxy = 1e300, 1e300, -1e300, -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180
        c, s = math.cos(r), math.sin(r)
        for j in range(15):
            X = tx[j]*c - ty[j]*s + xs[i]
            Y = tx[j]*s + ty[j]*c + ys[i]
            mnx, mxx = min(mnx, X), max(mxx, X)
            mny, mxy = min(mny, Y), max(mxy, Y)
    
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    """Convert string values to float."""
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

print("Scoring functions defined")

In [None]:
# Find all snapshots with submission files
snapshot_dir = '/home/nonroot/snapshots/santa-2025'
snapshots = sorted([d for d in os.listdir(snapshot_dir) if os.path.isdir(os.path.join(snapshot_dir, d))])

print(f"Found {len(snapshots)} snapshots")
print(f"First 5: {snapshots[:5]}")
print(f"Last 5: {snapshots[-5:]}")

In [None]:
# Scan all snapshots for submission files and calculate scores
all_submissions = []

for snap in snapshots:
    # Check for submission.csv in various locations
    paths_to_check = [
        f'{snapshot_dir}/{snap}/submission/submission.csv',
        f'{snapshot_dir}/{snap}/code/submission.csv',
        f'{snapshot_dir}/{snap}/submission.csv',
    ]
    
    for path in paths_to_check:
        if os.path.exists(path):
            all_submissions.append({'snapshot': snap, 'path': path})
            break

print(f"Found {len(all_submissions)} submissions")

In [None]:
# Calculate per-N scores for each submission
from tqdm import tqdm

best_per_n = {n: {'score': 1e300, 'snapshot': None, 'data': None} for n in range(1, 201)}
submission_scores = []

for sub in tqdm(all_submissions, desc="Scanning submissions"):
    try:
        df = pd.read_csv(sub['path'])
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        total_score = 0
        per_n_scores = {}
        
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].values)
            ys = strip(g['y'].values)
            ds = strip(g['deg'].values)
            sc = score_group(xs, ys, ds)
            per_n_scores[n] = sc
            total_score += sc
            
            if sc < best_per_n[n]['score']:
                best_per_n[n]['score'] = sc
                best_per_n[n]['snapshot'] = sub['snapshot']
                best_per_n[n]['data'] = g[['id', 'x', 'y', 'deg']].copy()
        
        submission_scores.append({
            'snapshot': sub['snapshot'],
            'path': sub['path'],
            'total_score': total_score,
            'per_n_scores': per_n_scores
        })
    except Exception as e:
        print(f"Error with {sub['path']}: {e}")

print(f"\nProcessed {len(submission_scores)} valid submissions")

In [None]:
# Show top 10 submissions by total score
submission_scores.sort(key=lambda x: x['total_score'])
print("Top 10 submissions by total score:")
print("="*60)
for i, sub in enumerate(submission_scores[:10]):
    print(f"{i+1}. {sub['snapshot']}: {sub['total_score']:.6f}")

In [None]:
# Calculate ensemble score (best per-N from all sources)
ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"\nEnsemble score (best per-N): {ensemble_score:.6f}")
print(f"Current baseline: 70.647327")
print(f"Improvement: {70.647327 - ensemble_score:.6f}")
print(f"Target: 68.888293")
print(f"Gap to target: {ensemble_score - 68.888293:.6f}")

In [None]:
# Count which snapshots contribute to the ensemble
contributing_snapshots = {}
for n in range(1, 201):
    snap = best_per_n[n]['snapshot']
    if snap not in contributing_snapshots:
        contributing_snapshots[snap] = {'count': 0, 'n_values': []}
    contributing_snapshots[snap]['count'] += 1
    contributing_snapshots[snap]['n_values'].append(n)

print("\nSnapshots contributing to ensemble:")
print("="*60)
for snap, info in sorted(contributing_snapshots.items(), key=lambda x: -x[1]['count']):
    print(f"{snap}: {info['count']} N values")

In [None]:
# Check for overlaps in the ensemble solution
def check_overlaps(xs, ys, degs, tolerance=1e-12):
    """Check if any trees overlap."""
    n = len(xs)
    if n <= 1:
        return []
    
    polygons = [make_tree_polygon(xs[i], ys[i], degs[i]) for i in range(n)]
    tree_index = STRtree(polygons)
    overlaps = []
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx > i:
                if polygons[i].intersects(polygons[idx]) and not polygons[i].touches(polygons[idx]):
                    intersection = polygons[i].intersection(polygons[idx])
                    if intersection.area > tolerance:
                        overlaps.append((i, idx, intersection.area))
    return overlaps

print("Checking ensemble for overlaps...")
overlap_count = 0
overlap_details = []

for n in range(1, 201):
    data = best_per_n[n]['data']
    if data is None:
        continue
    xs = strip(data['x'].values)
    ys = strip(data['y'].values)
    ds = strip(data['deg'].values)
    overlaps = check_overlaps(xs, ys, ds)
    if overlaps:
        overlap_count += 1
        overlap_details.append((n, len(overlaps), overlaps[0][2]))

print(f"\nN values with overlaps: {overlap_count}")
if overlap_details:
    print("First 10 overlap details:")
    for n, count, area in overlap_details[:10]:
        print(f"  N={n}: {count} overlaps, max area={area:.2e}")

In [None]:
# Build ensemble from VALID (non-overlapping) solutions only
print("\nBuilding VALID ensemble (excluding overlapping solutions)...")

# First, identify which snapshots have overlaps for which N
valid_per_n = {n: {'score': 1e300, 'snapshot': None, 'data': None} for n in range(1, 201)}

for sub in tqdm(submission_scores, desc="Checking validity"):
    try:
        df = pd.read_csv(sub['path'])
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = strip(g['x'].values)
            ys = strip(g['y'].values)
            ds = strip(g['deg'].values)
            
            # Check for overlaps
            overlaps = check_overlaps(xs, ys, ds)
            if overlaps:
                continue  # Skip this N from this snapshot
            
            sc = sub['per_n_scores'].get(n, 1e300)
            if sc < valid_per_n[n]['score']:
                valid_per_n[n]['score'] = sc
                valid_per_n[n]['snapshot'] = sub['snapshot']
                valid_per_n[n]['data'] = g[['id', 'x', 'y', 'deg']].copy()
    except Exception as e:
        pass

valid_ensemble_score = sum(valid_per_n[n]['score'] for n in range(1, 201))
print(f"\nValid ensemble score: {valid_ensemble_score:.6f}")
print(f"Improvement from baseline: {70.647327 - valid_ensemble_score:.6f}")

In [None]:
# Count contributing snapshots for valid ensemble
valid_contributing = {}
for n in range(1, 201):
    snap = valid_per_n[n]['snapshot']
    if snap not in valid_contributing:
        valid_contributing[snap] = 0
    valid_contributing[snap] += 1

print("\nValid snapshots contributing to ensemble:")
print("="*60)
for snap, count in sorted(valid_contributing.items(), key=lambda x: -x[1]):
    print(f"{snap}: {count} N values")

In [None]:
# Save the valid ensemble submission
print("\nSaving valid ensemble submission...")

ensemble_rows = []
for n in range(1, 201):
    data = valid_per_n[n]['data']
    if data is not None:
        for _, row in data.iterrows():
            ensemble_rows.append(row)

ensemble_df = pd.DataFrame(ensemble_rows)
print(f"Total rows: {len(ensemble_df)}")
print(f"Expected: 20100")

# Save
os.makedirs('/home/code/experiments/004_ensemble_valid', exist_ok=True)
ensemble_df.to_csv('/home/code/experiments/004_ensemble_valid/submission.csv', index=False)
ensemble_df.to_csv('/home/submission/submission.csv', index=False)

print("\nFirst 5 rows:")
print(ensemble_df.head())

In [None]:
# Save metrics
metrics = {
    'cv_score': valid_ensemble_score,
    'baseline_score': 70.647327,
    'improvement': 70.647327 - valid_ensemble_score,
    'target': 68.888293,
    'gap': valid_ensemble_score - 68.888293,
    'num_snapshots_used': len([s for s in valid_contributing if valid_contributing[s] > 0]),
    'total_snapshots_scanned': len(submission_scores)
}

with open('/home/code/experiments/004_ensemble_valid/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nMetrics saved: {metrics}")
print(f"\n{'='*60}")
print(f"SUMMARY:")
print(f"  Valid ensemble score: {valid_ensemble_score:.6f}")
print(f"  Baseline: 70.647327")
print(f"  Improvement: {70.647327 - valid_ensemble_score:.6f}")
print(f"  Target: 68.888293")
print(f"  Gap to target: {valid_ensemble_score - 68.888293:.6f}")
print(f"{'='*60}")