# Experiment 003: Comprehensive Ensemble

Compare all available pre-optimized sources and pick the best configuration for each N value.

In [None]:
import pandas as pd
import numpy as np
import glob
import os
from numba import njit
import math

# Tree geometry
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace("s","")) for v in a], np.float64)

tx, ty = make_polygon_template()
print("Functions defined")

In [None]:
# Find all CSV files in preoptimized folder
preopt_base = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

csv_files = []
for root, dirs, files in os.walk(preopt_base):
    for f in files:
        if f.endswith('.csv'):
            csv_files.append(os.path.join(root, f))

print(f"Found {len(csv_files)} CSV files:")
for f in csv_files:
    print(f"  {f}")

In [None]:
# Load and compare all sources
best = {n: {"score": 1e300, "data": None, "src": None} for n in range(1, 201)}

for fp in csv_files:
    try:
        df = pd.read_csv(fp)
    except Exception as e:
        print(f"Error loading {fp}: {e}")
        continue
    
    if not {"id", "x", "y", "deg"}.issubset(df.columns):
        print(f"Skipping {fp} - missing columns")
        continue
    
    df = df.copy()
    df["N"] = df["id"].astype(str).str.split("_").str[0].astype(int)
    
    for n, g in df.groupby("N"):
        if n < 1 or n > 200:
            continue
        xs = strip(g["x"].to_numpy())
        ys = strip(g["y"].to_numpy())
        ds = strip(g["deg"].to_numpy())
        sc = score_group(xs, ys, ds, tx, ty)
        if sc < best[n]["score"]:
            best[n]["score"] = float(sc)
            best[n]["data"] = g.drop(columns=["N"]).copy()
            best[n]["src"] = os.path.basename(fp)

print("\nDone scanning all sources")

In [None]:
# Show which sources are best for each N
source_counts = {}
for n in range(1, 201):
    src = best[n]["src"]
    if src:
        source_counts[src] = source_counts.get(src, 0) + 1

print("Source distribution:")
for src, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f"  {src}: {count} N values")

In [None]:
# Calculate total score
total_score = sum(best[n]["score"] for n in range(1, 201))
print(f"\nTotal ensemble score: {total_score:.6f}")
print(f"Baseline score: 70.676102")
print(f"Improvement: {70.676102 - total_score:.6f}")

In [None]:
# Show top 20 contributors to score
per_n = [(n, best[n]["score"], best[n]["src"]) for n in range(1, 201)]
per_n.sort(key=lambda x: -x[1])

print("\nTop 20 score contributors:")
for n, score, src in per_n[:20]:
    print(f"  N={n:3d}: {score:.6f} from {src}")

In [None]:
# Build the ensemble submission
rows = []
for n in range(1, 201):
    entry = best[n]
    if entry["data"] is not None:
        rows.append(entry["data"])

out = pd.concat(rows, ignore_index=True)
out["sn"] = out["id"].str.split("_").str[0].astype(int)
out["si"] = out["id"].str.split("_").str[1].astype(int)
out = out.sort_values(["sn", "si"]).drop(columns=["sn", "si"])
out = out[["id", "x", "y", "deg"]]

print(f"Ensemble has {len(out)} rows")
print(out.head(10))

In [None]:
# Save ensemble
out.to_csv('/home/code/experiments/003_ensemble/submission.csv', index=False)
out.to_csv('/home/submission/submission.csv', index=False)
print("Saved ensemble submission")

# Verify score
df_verify = pd.read_csv('/home/submission/submission.csv')
df_verify["N"] = df_verify["id"].astype(str).str.split("_").str[0].astype(int)

verify_total = 0
for n, g in df_verify.groupby("N"):
    xs = strip(g["x"].to_numpy())
    ys = strip(g["y"].to_numpy())
    ds = strip(g["deg"].to_numpy())
    sc = score_group(xs, ys, ds, tx, ty)
    verify_total += sc

print(f"\nVerified total score: {verify_total:.6f}")