# Ensemble Optimizer + Strict Overlap Validation

Combine best valid configurations from all snapshots, then run C++ optimizer.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.validation import make_valid
import os
import warnings
warnings.filterwarnings('ignore')

# Tree vertices
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def get_tree_polygon(x, y, deg):
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    vertices = [(TX[i] * c - TY[i] * s + x, TX[i] * s + TY[i] * c + y) for i in range(len(TX))]
    return Polygon(vertices)

def score_group(xs, ys, degs):
    n = len(xs)
    all_x, all_y = [], []
    for i in range(n):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        for j in range(len(TX)):
            x = TX[j] * c - TY[j] * s + xs[i]
            y = TX[j] * s + TY[j] * c + ys[i]
            all_x.append(x)
            all_y.append(y)
    side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
    return side * side / n

def strict_check_overlaps(xs, ys, degs, tolerance=1e-9):
    """Strict overlap check - more precise than default Shapely"""
    polys = [get_tree_polygon(xs[i], ys[i], degs[i]) for i in range(len(xs))]
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if polys[i].intersects(polys[j]):
                inter = polys[i].intersection(polys[j])
                if inter.area > tolerance:
                    return True, (i, j, inter.area)
    return False, None

print("Functions defined")

Functions defined


In [2]:
# Load all snapshots and find best VALID configuration for each N
snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
snapshots = sorted(os.listdir(snapshot_dir))
print(f"Found {len(snapshots)} snapshots")

# Store best valid config for each N
best_configs = {}  # n -> (score, xs, ys, degs, snapshot)

for snap in snapshots:
    sub_path = os.path.join(snapshot_dir, snap, 'submission', 'submission.csv')
    if not os.path.exists(sub_path):
        continue
    
    try:
        df = pd.read_csv(sub_path)
        df['x_val'] = df['x'].apply(parse_value)
        df['y_val'] = df['y'].apply(parse_value)
        df['deg_val'] = df['deg'].apply(parse_value)
        df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
        
        for n in range(1, 201):
            group = df[df['n'] == n]
            xs = group['x_val'].values
            ys = group['y_val'].values
            degs = group['deg_val'].values
            
            # Check for overlaps
            has_overlap, _ = strict_check_overlaps(xs, ys, degs)
            if has_overlap:
                continue
            
            score = score_group(xs, ys, degs)
            
            if n not in best_configs or score < best_configs[n][0]:
                best_configs[n] = (score, xs.copy(), ys.copy(), degs.copy(), snap)
    except Exception as e:
        print(f"Error with {snap}: {e}")
        continue

print(f"\nFound valid configs for {len(best_configs)} N values")

Found 100 snapshots


Error with 21145963314: 'deg'



Found valid configs for 200 N values


In [None]:
# Check which N values are missing valid configs
missing = [n for n in range(1, 201) if n not in best_configs]
print(f"Missing valid configs for N: {missing[:20]}... ({len(missing)} total)")

# Calculate total score from best valid configs
if len(best_configs) == 200:
    total_score = sum(best_configs[n][0] for n in range(1, 201))
    print(f"\nTotal score from ensemble: {total_score:.6f}")
else:
    print("\nNot all N values have valid configs!")

In [None]:
# Show best configs for small N
print("Best valid configs for small N:")
for n in range(1, 11):
    if n in best_configs:
        score, xs, ys, degs, snap = best_configs[n]
        print(f"N={n}: score={score:.6f} from snapshot {snap}")
    else:
        print(f"N={n}: NO VALID CONFIG FOUND!")

In [None]:
# Create ensemble submission from best valid configs
if len(best_configs) == 200:
    rows = []
    for n in range(1, 201):
        score, xs, ys, degs, snap = best_configs[n]
        for i in range(n):
            rows.append({
                'id': f'{n:03d}_{i}',
                'x': f's{xs[i]}',
                'y': f's{ys[i]}',
                'deg': f's{degs[i]}'
            })
    
    df_ensemble = pd.DataFrame(rows)
    df_ensemble.to_csv('ensemble_submission.csv', index=False)
    print(f"Created ensemble_submission.csv with {len(df_ensemble)} rows")
    
    # Verify score
    df_verify = pd.read_csv('ensemble_submission.csv')
    df_verify['x_val'] = df_verify['x'].apply(parse_value)
    df_verify['y_val'] = df_verify['y'].apply(parse_value)
    df_verify['deg_val'] = df_verify['deg'].apply(parse_value)
    df_verify['n'] = df_verify['id'].apply(lambda x: int(x.split('_')[0]))
    
    verify_total = 0
    for n in range(1, 201):
        group = df_verify[df_verify['n'] == n]
        xs = group['x_val'].values
        ys = group['y_val'].values
        degs = group['deg_val'].values
        verify_total += score_group(xs, ys, degs)
    
    print(f"Verified ensemble score: {verify_total:.6f}")
else:
    print("Cannot create ensemble - missing configs")

In [None]:
# Final validation - check ALL groups for overlaps
print("\nFinal overlap validation...")
invalid_groups = []
for n in range(1, 201):
    if n in best_configs:
        score, xs, ys, degs, snap = best_configs[n]
        has_overlap, info = strict_check_overlaps(xs, ys, degs, tolerance=1e-12)
        if has_overlap:
            invalid_groups.append((n, info))
            print(f"N={n}: OVERLAP DETECTED! {info}")

if not invalid_groups:
    print("All groups are valid - no overlaps detected!")
else:
    print(f"\n{len(invalid_groups)} groups have overlaps!")

In [None]:
# Copy to submission folder if valid
import shutil

if not invalid_groups and len(best_configs) == 200:
    shutil.copy('ensemble_submission.csv', '/home/submission/submission.csv')
    print("Copied to /home/submission/submission.csv")
    
    # Save metrics
    import json
    with open('metrics.json', 'w') as f:
        json.dump({'cv_score': verify_total}, f)
    print(f"Saved metrics.json with cv_score: {verify_total}")
else:
    print("Cannot submit - validation failed")