# Multi-Source Ensemble

Combine configurations from ALL available sources:
- saspav_latest (baseline)
- All submission candidates (candidate_000 through candidate_012)
- eazy_output.csv
- Current submission

For each N, take the BEST valid configuration from any source.

In [1]:
import pandas as pd
import numpy as np
import math
import os
import glob
from shapely.geometry import Polygon
from shapely import affinity

print("Starting multi-source ensemble...")

Starting multi-source ensemble...


In [2]:
# Tree shape constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

def get_tree_poly(x, y, deg):
    coords = [
        (0.0, TIP_Y), (TOP_W / 2.0, TIER_1_Y), (TOP_W / 4.0, TIER_1_Y),
        (MID_W / 2.0, TIER_2_Y), (MID_W / 4.0, TIER_2_Y), (BASE_W / 2.0, BASE_Y),
        (TRUNK_W / 2.0, BASE_Y), (TRUNK_W / 2.0, TRUNK_BOTTOM_Y),
        (-TRUNK_W / 2.0, TRUNK_BOTTOM_Y), (-TRUNK_W / 2.0, BASE_Y),
        (-BASE_W / 2.0, BASE_Y), (-MID_W / 4.0, TIER_2_Y), (-MID_W / 2.0, TIER_2_Y),
        (-TOP_W / 4.0, TIER_1_Y), (-TOP_W / 2.0, TIER_1_Y),
    ]
    poly = Polygon(coords)
    return affinity.translate(affinity.rotate(poly, deg, origin=(0, 0)), x, y)

def calculate_side_length(xs, ys, degs):
    n = len(xs)
    min_x = min_y = 1e10
    max_x = max_y = -1e10
    for i in range(n):
        poly = get_tree_poly(xs[i], ys[i], degs[i])
        bounds = poly.bounds
        if bounds[0] < min_x: min_x = bounds[0]
        if bounds[1] < min_y: min_y = bounds[1]
        if bounds[2] > max_x: max_x = bounds[2]
        if bounds[3] > max_y: max_y = bounds[3]
    return max(max_x - min_x, max_y - min_y)

def calculate_score(xs, ys, degs):
    n = len(xs)
    side = calculate_side_length(xs, ys, degs)
    return side * side / n

def check_overlaps(xs, ys, degs):
    n = len(xs)
    polys = [get_tree_poly(xs[i], ys[i], degs[i]) for i in range(n)]
    for i in range(n):
        for j in range(i+1, n):
            if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):
                if polys[i].intersection(polys[j]).area > 1e-10:
                    return True
    return False

In [3]:
def load_csv_configs(filepath):
    """Load configurations from a CSV file."""
    try:
        df = pd.read_csv(filepath)
        
        # Handle different column names
        if 'deg' in df.columns:
            angle_col = 'deg'
        elif 'angle' in df.columns:
            angle_col = 'angle'
        else:
            return None
        
        # Handle 's' prefix
        for col in ['x', 'y', angle_col]:
            if col in df.columns and df[col].dtype == object:
                df[col] = df[col].astype(str).str.replace('s', '').astype(float)
        
        configs = {}
        for n in range(1, 201):
            prefix = f"{n:03d}_"
            group = df[df["id"].str.startswith(prefix)].sort_values("id")
            if len(group) == n:
                configs[n] = {
                    'xs': group['x'].values.astype(np.float64),
                    'ys': group['y'].values.astype(np.float64),
                    'degs': group[angle_col].values.astype(np.float64)
                }
        return configs
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None

In [4]:
# Collect all source files
source_files = []

# Baseline
source_files.append(('/home/code/external_data/saspav_latest/santa-2025.csv', 'saspav_latest'))

# Submission candidates
for i in range(13):
    path = f'/home/code/submission_candidates/candidate_{i:03d}.csv'
    if os.path.exists(path):
        source_files.append((path, f'candidate_{i:03d}'))

# Eazy output
eazy_path = '/home/code/experiments/017_just_luck_multiphase/eazy_output.csv'
if os.path.exists(eazy_path):
    source_files.append((eazy_path, 'eazy_output'))

# Current submission
if os.path.exists('/home/submission/submission.csv'):
    source_files.append(('/home/submission/submission.csv', 'current_submission'))

print(f"Found {len(source_files)} source files:")
for path, name in source_files:
    print(f"  {name}: {path}")

Found 16 source files:
  saspav_latest: /home/code/external_data/saspav_latest/santa-2025.csv
  candidate_000: /home/code/submission_candidates/candidate_000.csv
  candidate_001: /home/code/submission_candidates/candidate_001.csv
  candidate_002: /home/code/submission_candidates/candidate_002.csv
  candidate_003: /home/code/submission_candidates/candidate_003.csv
  candidate_004: /home/code/submission_candidates/candidate_004.csv
  candidate_005: /home/code/submission_candidates/candidate_005.csv
  candidate_006: /home/code/submission_candidates/candidate_006.csv
  candidate_007: /home/code/submission_candidates/candidate_007.csv
  candidate_008: /home/code/submission_candidates/candidate_008.csv
  candidate_009: /home/code/submission_candidates/candidate_009.csv
  candidate_010: /home/code/submission_candidates/candidate_010.csv
  candidate_011: /home/code/submission_candidates/candidate_011.csv
  candidate_012: /home/code/submission_candidates/candidate_012.csv
  eazy_output: /home/c

In [5]:
# Load all configurations
print("\nLoading all configurations...")
all_configs = {}  # source_name -> {n -> config}

for path, name in source_files:
    configs = load_csv_configs(path)
    if configs:
        all_configs[name] = configs
        print(f"  {name}: {len(configs)} configurations")
    else:
        print(f"  {name}: FAILED to load")

print(f"\nLoaded {len(all_configs)} sources")


Loading all configurations...


  saspav_latest: 200 configurations


  candidate_000: 200 configurations


  candidate_001: 200 configurations


  candidate_002: 200 configurations


  candidate_003: 200 configurations


  candidate_004: 200 configurations


  candidate_005: 200 configurations


  candidate_006: 200 configurations


  candidate_007: 200 configurations


  candidate_008: 200 configurations


  candidate_009: 200 configurations


  candidate_010: 200 configurations


  candidate_011: 200 configurations


  candidate_012: 200 configurations


  eazy_output: 200 configurations


  current_submission: 200 configurations

Loaded 16 sources


In [6]:
# For each N, find the best VALID configuration across all sources
print("\nFinding best configuration for each N...")

best_configs = {}
best_scores = {}
best_sources = {}

for n in range(1, 201):
    best_score = float('inf')
    best_config = None
    best_source = None
    
    for source_name, configs in all_configs.items():
        if n not in configs:
            continue
        
        config = configs[n]
        xs, ys, degs = config['xs'], config['ys'], config['degs']
        
        # Check for overlaps
        if check_overlaps(xs, ys, degs):
            continue
        
        # Calculate score
        score = calculate_score(xs, ys, degs)
        
        if score < best_score:
            best_score = score
            best_config = config
            best_source = source_name
    
    if best_config is not None:
        best_configs[n] = best_config
        best_scores[n] = best_score
        best_sources[n] = best_source

print(f"Found valid configurations for {len(best_configs)} N values")


Finding best configuration for each N...


Found valid configurations for 200 N values


In [7]:
# Analyze which sources contributed
print("\nSource contribution analysis:")
source_counts = {}
for n, source in best_sources.items():
    source_counts[source] = source_counts.get(source, 0) + 1

for source, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f"  {source}: {count} configurations")


Source contribution analysis:
  candidate_010: 98 configurations
  eazy_output: 73 configurations
  saspav_latest: 23 configurations
  candidate_001: 3 configurations
  candidate_009: 2 configurations
  current_submission: 1 configurations


In [8]:
# Calculate total score
total_score = sum(best_scores.values())
print(f"\nTotal ensemble score: {total_score:.9f}")

# Compare with baseline
baseline_configs = all_configs.get('saspav_latest', {})
baseline_total = 0
for n in range(1, 201):
    if n in baseline_configs:
        config = baseline_configs[n]
        baseline_total += calculate_score(config['xs'], config['ys'], config['degs'])

print(f"Baseline score: {baseline_total:.9f}")
print(f"Improvement: {baseline_total - total_score:.9f}")


Total ensemble score: 70.659474935


Baseline score: 70.659958322
Improvement: 0.000483387


In [9]:
# Find N values where ensemble is better than baseline
print("\nN values with improvements over baseline:")
improvements = []
for n in range(1, 201):
    if n in best_configs and n in baseline_configs:
        baseline_score = calculate_score(baseline_configs[n]['xs'], baseline_configs[n]['ys'], baseline_configs[n]['degs'])
        ensemble_score = best_scores[n]
        if ensemble_score < baseline_score - 1e-12:
            improvements.append((n, baseline_score, ensemble_score, baseline_score - ensemble_score, best_sources[n]))

print(f"Found {len(improvements)} improvements:")
for n, baseline_s, ensemble_s, imp, source in sorted(improvements, key=lambda x: -x[3])[:20]:
    print(f"  N={n}: {baseline_s:.9f} -> {ensemble_s:.9f} (improvement: {imp:.9f}) from {source}")


N values with improvements over baseline:


Found 138 improvements:
  N=65: 0.363793399 -> 0.363327701 (improvement: 0.000465699) from current_submission
  N=195: 0.332616928 -> 0.332609589 (improvement: 0.000007339) from candidate_010
  N=73: 0.353203556 -> 0.353201196 (improvement: 0.000002360) from candidate_010
  N=157: 0.341371027 -> 0.341369183 (improvement: 0.000001843) from eazy_output
  N=193: 0.333764060 -> 0.333762704 (improvement: 0.000001356) from eazy_output
  N=111: 0.343923239 -> 0.343922483 (improvement: 0.000000756) from eazy_output
  N=149: 0.337931398 -> 0.337930752 (improvement: 0.000000647) from candidate_010
  N=133: 0.340350747 -> 0.340350310 (improvement: 0.000000436) from candidate_010
  N=187: 0.340237100 -> 0.340236736 (improvement: 0.000000364) from eazy_output
  N=143: 0.341148514 -> 0.341148152 (improvement: 0.000000362) from eazy_output
  N=196: 0.333268194 -> 0.333267979 (improvement: 0.000000216) from eazy_output
  N=2: 0.450779183 -> 0.450779057 (improvement: 0.000000126) from candidate_010
  N

In [10]:
# Save ensemble submission
print("\nSaving ensemble submission...")
os.makedirs('/home/submission', exist_ok=True)

rows = []
for n in range(1, 201):
    if n in best_configs:
        config = best_configs[n]
        for i in range(n):
            rows.append({
                'id': f'{n:03d}_{i:03d}',
                'x': config['xs'][i],
                'y': config['ys'][i],
                'angle': config['degs'][i]
            })

df = pd.DataFrame(rows)
df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved to /home/submission/submission.csv")
print(f"Total rows: {len(df)}")
print(f"\nFinal score: {total_score:.9f}")


Saving ensemble submission...
Saved to /home/submission/submission.csv
Total rows: 20100

Final score: 70.659474935
