# Experiment 005: Simple Strict Ensemble

Use the valid baseline and only accept improvements that pass strict validation.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.ops import unary_union
from decimal import Decimal, getcontext
import json
import os
from glob import glob

getcontext().prec = 30
print("Imports done")

In [None]:
# Tree geometry
def get_tree_polygon():
    vertices = [
        (0.0, 0.8),
        (0.125, 0.5), (0.0625, 0.5),
        (0.2, 0.25), (0.1, 0.25),
        (0.35, 0.0), (0.075, 0.0), (0.075, -0.2),
        (-0.075, -0.2), (-0.075, 0.0), (-0.35, 0.0),
        (-0.1, 0.25), (-0.2, 0.25),
        (-0.0625, 0.5), (-0.125, 0.5),
    ]
    return Polygon(vertices)

TREE_POLY = get_tree_polygon()
print(f"Tree: {len(TREE_POLY.exterior.coords)} vertices, area={TREE_POLY.area:.6f}")

In [None]:
def parse_s_value(s_val):
    if isinstance(s_val, str) and s_val.startswith('s'):
        return float(s_val[1:])
    return float(s_val)

def create_tree(x, y, deg):
    return translate(rotate(TREE_POLY, deg, origin=(0, 0)), x, y)

def get_bbox_side(polygons):
    if not polygons:
        return 0
    combined = unary_union(polygons)
    bounds = combined.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def check_overlaps_strict(polygons, tolerance=1e-15):
    """STRICT overlap check."""
    if len(polygons) <= 1:
        return False, None
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                if not polygons[i].touches(polygons[j]):
                    try:
                        intersection = polygons[i].intersection(polygons[j])
                        if intersection.area > tolerance:
                            return True, f"Trees {i},{j} overlap (area={intersection.area:.2e})"
                    except:
                        return True, f"Trees {i},{j} intersection error"
    return False, None

print("Functions defined")

In [None]:
def load_submission(path):
    try:
        df = pd.read_csv(path)
        if 'x' not in df.columns:
            return None
        df['x_val'] = df['x'].apply(parse_s_value)
        df['y_val'] = df['y'].apply(parse_s_value)
        df['deg_val'] = df['deg'].apply(parse_s_value)
        df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
        return df
    except:
        return None

# Load valid baseline
baseline_path = '/home/nonroot/snapshots/santa-2025/21328309254/submission/submission.csv'
baseline_df = load_submission(baseline_path)
print(f"Loaded baseline: {len(baseline_df)} rows")

In [None]:
# Compute baseline scores
baseline_scores = {}
baseline_data = {}

for n in range(1, 201):
    n_df = baseline_df[baseline_df['n'] == n]
    if len(n_df) != n:
        print(f"ERROR: N={n} has {len(n_df)} rows")
        continue
    
    xs = n_df['x_val'].tolist()
    ys = n_df['y_val'].tolist()
    degs = n_df['deg_val'].tolist()
    
    polygons = [create_tree(xs[i], ys[i], degs[i]) for i in range(n)]
    side = get_bbox_side(polygons)
    score = (side ** 2) / n
    
    baseline_scores[n] = score
    baseline_data[n] = {
        'xs': xs, 'ys': ys, 'degs': degs,
        'x_strs': n_df['x'].tolist(),
        'y_strs': n_df['y'].tolist(),
        'deg_strs': n_df['deg'].tolist()
    }

baseline_total = sum(baseline_scores.values())
print(f"Baseline total: {baseline_total:.6f}")

In [None]:
# Find CSV files to scan
csv_files = glob('/home/nonroot/snapshots/santa-2025/*/submission/submission.csv')
print(f"Found {len(csv_files)} submission files")

In [None]:
# Scan for improvements with STRICT validation
best_per_n = {n: {'score': baseline_scores[n], 'data': baseline_data[n], 'source': 'baseline'} 
              for n in range(1, 201)}

improvement_count = 0

for idx, csv_path in enumerate(csv_files):
    if idx % 20 == 0:
        print(f"Processing file {idx+1}/{len(csv_files)}...")
    
    df = load_submission(csv_path)
    if df is None:
        continue
    
    for n in range(1, 201):
        n_df = df[df['n'] == n]
        if len(n_df) != n:
            continue
        
        xs = n_df['x_val'].tolist()
        ys = n_df['y_val'].tolist()
        degs = n_df['deg_val'].tolist()
        
        # Compute score
        try:
            polygons = [create_tree(xs[i], ys[i], degs[i]) for i in range(n)]
            side = get_bbox_side(polygons)
            score = (side ** 2) / n
        except:
            continue
        
        # Only consider if better
        if score >= best_per_n[n]['score']:
            continue
        
        # STRICT overlap check
        has_overlap, msg = check_overlaps_strict(polygons)
        if has_overlap:
            continue
        
        # Valid improvement!
        improvement = best_per_n[n]['score'] - score
        if improvement > 0.001:
            print(f"  N={n}: {best_per_n[n]['score']:.6f} -> {score:.6f} ({improvement:.6f})")
        
        best_per_n[n] = {
            'score': score,
            'data': {
                'xs': xs, 'ys': ys, 'degs': degs,
                'x_strs': n_df['x'].tolist(),
                'y_strs': n_df['y'].tolist(),
                'deg_strs': n_df['deg'].tolist()
            },
            'source': csv_path
        }
        improvement_count += 1

print(f"\nFound {improvement_count} valid improvements")

In [None]:
# Compute ensemble score
ensemble_total = sum(best_per_n[n]['score'] for n in range(1, 201))
improvement = baseline_total - ensemble_total

print(f"\n{'='*50}")
print(f"Baseline: {baseline_total:.6f}")
print(f"Ensemble: {ensemble_total:.6f}")
print(f"Improvement: {improvement:.6f}")
print(f"{'='*50}")
print(f"\nTarget: 68.888293")
print(f"Gap: {ensemble_total - 68.888293:.6f}")

In [None]:
# Final validation
print("\nFinal validation...")
all_valid = True
for n in range(1, 201):
    data = best_per_n[n]['data']
    polygons = [create_tree(data['xs'][i], data['ys'][i], data['degs'][i]) for i in range(n)]
    has_overlap, msg = check_overlaps_strict(polygons)
    if has_overlap:
        print(f"OVERLAP at N={n}: {msg}")
        all_valid = False
        best_per_n[n] = {'score': baseline_scores[n], 'data': baseline_data[n], 'source': 'fallback'}

if all_valid:
    print("All 200 configurations VALID!")
else:
    ensemble_total = sum(best_per_n[n]['score'] for n in range(1, 201))
    print(f"After fallbacks: {ensemble_total:.6f}")

In [None]:
# Save submission
rows = []
for n in range(1, 201):
    data = best_per_n[n]['data']
    for i in range(n):
        rows.append({
            'id': f'{n:03d}_{i}',
            'x': data['x_strs'][i],
            'y': data['y_strs'][i],
            'deg': data['deg_strs'][i]
        })

df_out = pd.DataFrame(rows)
df_out.to_csv('/home/code/experiments/005_strict_ensemble/submission.csv', index=False)
df_out.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved {len(df_out)} rows")

In [None]:
# Save metrics
metrics = {
    'cv_score': ensemble_total,
    'baseline_score': baseline_total,
    'improvement': baseline_total - ensemble_total,
    'target': 68.888293,
    'gap': ensemble_total - 68.888293
}

with open('/home/code/experiments/005_strict_ensemble/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics: {metrics}")