# Experiment 002: Ensemble Best Solutions

This notebook:
1. Loads all available pre-optimized solutions
2. For each N, selects the best configuration
3. Creates an ensemble submission

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import math
import glob
import os
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Tree geometry - exact from competition
def get_tree_vertices():
    trunk_w = 0.15
    trunk_h = 0.2
    base_w = 0.7
    mid_w = 0.4
    top_w = 0.25
    tip_y = 0.8
    tier_1_y = 0.5
    tier_2_y = 0.25
    base_y = 0.0
    trunk_bottom_y = -trunk_h
    
    vertices = [
        (0.0, tip_y),
        (top_w / 2, tier_1_y),
        (top_w / 4, tier_1_y),
        (mid_w / 2, tier_2_y),
        (mid_w / 4, tier_2_y),
        (base_w / 2, base_y),
        (trunk_w / 2, base_y),
        (trunk_w / 2, trunk_bottom_y),
        (-trunk_w / 2, trunk_bottom_y),
        (-trunk_w / 2, base_y),
        (-base_w / 2, base_y),
        (-mid_w / 4, tier_2_y),
        (-mid_w / 2, tier_2_y),
        (-top_w / 4, tier_1_y),
        (-top_w / 2, tier_1_y),
    ]
    return vertices

BASE_TREE = Polygon(get_tree_vertices())
print(f"Base tree bounds: {BASE_TREE.bounds}")

In [None]:
def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def load_submission(filepath):
    try:
        df = pd.read_csv(filepath)
        if 'id' not in df.columns or 'x' not in df.columns:
            return None
        df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
        df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))
        df['x_val'] = df['x'].apply(parse_value)
        df['y_val'] = df['y'].apply(parse_value)
        df['deg_val'] = df['deg'].apply(parse_value)
        return df
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None

In [None]:
def create_tree_polygon(x, y, deg):
    tree = rotate(BASE_TREE, deg, origin=(0, 0))
    tree = translate(tree, x, y)
    return tree

def get_bounding_box_side(trees_df):
    all_x = []
    all_y = []
    for _, row in trees_df.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        minx, miny, maxx, maxy = poly.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    width = max(all_x) - min(all_x)
    height = max(all_y) - min(all_y)
    return max(width, height)

def calculate_score_for_n(df, n):
    trees_n = df[df['n'] == n]
    if len(trees_n) != n:
        return float('inf')
    side = get_bounding_box_side(trees_n)
    return side ** 2 / n

In [None]:
# Find all CSV files
csv_paths = []
for pattern in [
    '/home/code/santa-2025-csv/*.csv',
    '/home/code/bucket-of-chump/*.csv',
    '/home/code/telegram/*.csv',
    '/home/code/santa25-public/*.csv',
]:
    csv_paths.extend(glob.glob(pattern))

print(f"Found {len(csv_paths)} CSV files:")
for p in csv_paths:
    print(f"  {os.path.basename(p)}")

In [None]:
# Load all submissions
submissions = {}
for path in csv_paths:
    df = load_submission(path)
    if df is not None and len(df) == 20100:
        submissions[path] = df
        print(f"Loaded: {os.path.basename(path)}")

print(f"\nTotal valid submissions: {len(submissions)}")

In [None]:
# Calculate total score for each submission
print("\nCalculating scores for each submission...")
submission_scores = {}
for path, df in submissions.items():
    total_score = 0
    for n in range(1, 201):
        score_n = calculate_score_for_n(df, n)
        total_score += score_n
    submission_scores[path] = total_score
    print(f"{os.path.basename(path)}: {total_score:.6f}")

# Sort by score
sorted_subs = sorted(submission_scores.items(), key=lambda x: x[1])
print("\nRanked submissions:")
for path, score in sorted_subs:
    print(f"  {score:.6f}: {os.path.basename(path)}")

In [None]:
# For each N, find the best configuration across all submissions
print("\nFinding best configuration for each N...")
best_configs = {}
best_sources = {}

for n in tqdm(range(1, 201)):
    best_score = float('inf')
    best_df = None
    best_source = None
    
    for path, df in submissions.items():
        score_n = calculate_score_for_n(df, n)
        if score_n < best_score:
            best_score = score_n
            best_df = df[df['n'] == n].copy()
            best_source = os.path.basename(path)
    
    best_configs[n] = best_df
    best_sources[n] = (best_source, best_score)

# Show which sources contributed
print("\nSource contributions:")
source_counts = {}
for n, (source, score) in best_sources.items():
    source_counts[source] = source_counts.get(source, 0) + 1

for source, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f"  {source}: {count} configurations")

In [None]:
# Calculate ensemble score
ensemble_score = sum(best_sources[n][1] for n in range(1, 201))
print(f"\nEnsemble total score: {ensemble_score:.6f}")
print(f"Best single submission: {sorted_subs[0][1]:.6f}")
print(f"Improvement: {sorted_subs[0][1] - ensemble_score:.6f}")

In [None]:
# Create ensemble submission
ensemble_rows = []
for n in range(1, 201):
    for _, row in best_configs[n].iterrows():
        ensemble_rows.append({
            'id': row['id'],
            'x': f"s{row['x_val']}",
            'y': f"s{row['y_val']}",
            'deg': f"s{row['deg_val']}"
        })

ensemble_df = pd.DataFrame(ensemble_rows)
ensemble_df = ensemble_df.sort_values('id')

# Save
ensemble_df.to_csv('/home/submission/submission.csv', index=False)
ensemble_df.to_csv('/home/code/experiments/002_ensemble/submission.csv', index=False)

print(f"Saved ensemble submission with {len(ensemble_df)} rows")
print(f"Score: {ensemble_score:.6f}")

In [None]:
# Save metrics
import json
metrics = {
    'ensemble_score': ensemble_score,
    'best_single_score': sorted_subs[0][1],
    'improvement': sorted_subs[0][1] - ensemble_score,
    'num_sources': len(submissions),
    'source_contributions': source_counts
}
with open('/home/code/experiments/002_ensemble/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print("Metrics saved")