# Loop 2 Analysis: Ensemble from Multiple Sources

The evaluator correctly identified that local search on a local optimum is futile.

**Key insight**: The ensemble approach combines the best configurations from multiple sources for each N.

In [None]:
import numpy as np
import pandas as pd
import os
import glob

# Tree polygon vertices
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_side(xs, ys, degs):
    """Calculate bounding box side length for a configuration."""
    all_px, all_py = [], []
    for x, y, deg in zip(xs, ys, degs):
        rad = np.radians(deg)
        c, s = np.cos(rad), np.sin(rad)
        px = TX * c - TY * s + x
        py = TX * s + TY * c + y
        all_px.extend(px)
        all_py.extend(py)
    return max(max(all_px) - min(all_px), max(all_py) - min(all_py))

def get_score(xs, ys, degs, n):
    """Calculate score for a configuration."""
    side = get_side(xs, ys, degs)
    return side * side / n

def load_submission(filepath):
    """Load a submission and return configs dict."""
    df = pd.read_csv(filepath)
    configs = {}
    for n in range(1, 201):
        group = df[df['id'].str.startswith(f'{n:03d}_')]
        if len(group) == n:
            xs = np.array([float(str(x).lstrip('s')) for x in group['x']])
            ys = np.array([float(str(y).lstrip('s')) for y in group['y']])
            degs = np.array([float(str(d).lstrip('s')) for d in group['deg']])
            configs[n] = {'xs': xs, 'ys': ys, 'degs': degs}
    return configs

print('Functions defined')

In [None]:
# Load all available submissions
submission_files = [
    '/home/code/preoptimized_submission.csv',
    '/home/code/datasets/71.97.csv',
    '/home/code/datasets/72.49.csv',
    '/home/code/datasets/santa-2025.csv',
    '/home/code/datasets/submission.csv',
]

all_submissions = {}
for filepath in submission_files:
    if os.path.exists(filepath):
        name = os.path.basename(filepath)
        configs = load_submission(filepath)
        total_score = sum(get_score(configs[n]['xs'], configs[n]['ys'], configs[n]['degs'], n) for n in configs)
        all_submissions[name] = {'configs': configs, 'total_score': total_score}
        print(f'{name}: {total_score:.6f} (N values: {len(configs)})')
    else:
        print(f'File not found: {filepath}')

In [None]:
# Create ensemble: for each N, pick the best configuration across all sources
ensemble_configs = {}
ensemble_sources = {}

for n in range(1, 201):
    best_score = float('inf')
    best_config = None
    best_source = None
    
    for name, data in all_submissions.items():
        if n in data['configs']:
            cfg = data['configs'][n]
            score = get_score(cfg['xs'], cfg['ys'], cfg['degs'], n)
            if score < best_score:
                best_score = score
                best_config = cfg
                best_source = name
    
    if best_config is not None:
        ensemble_configs[n] = best_config
        ensemble_sources[n] = best_source

# Calculate ensemble total score
ensemble_total = sum(get_score(ensemble_configs[n]['xs'], ensemble_configs[n]['ys'], ensemble_configs[n]['degs'], n) for n in ensemble_configs)
print(f'\nEnsemble total score: {ensemble_total:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {ensemble_total - 68.922808:.6f}')

In [None]:
# Show which source contributed to each N
from collections import Counter
source_counts = Counter(ensemble_sources.values())
print('\nSource contributions to ensemble:')
for source, count in source_counts.most_common():
    print(f'  {source}: {count} N values')

In [None]:
# Show per-N comparison for worst N values
print('\nPer-N comparison (top 20 worst in ensemble):')
scores = []
for n in range(1, 201):
    cfg = ensemble_configs[n]
    score = get_score(cfg['xs'], cfg['ys'], cfg['degs'], n)
    scores.append((n, score, ensemble_sources[n]))

scores.sort(key=lambda x: -x[1])
for n, score, source in scores[:20]:
    print(f'  N={n:3d}: {score:.6f} (from {source})')

In [None]:
# Compare individual N values between sources
print('\nDetailed comparison for worst N values:')
for n, _, _ in scores[:10]:
    print(f'\nN={n}:')
    for name, data in all_submissions.items():
        if n in data['configs']:
            cfg = data['configs'][n]
            score = get_score(cfg['xs'], cfg['ys'], cfg['degs'], n)
            marker = ' <-- BEST' if name == ensemble_sources[n] else ''
            print(f'  {name}: {score:.6f}{marker}')

In [None]:
# Generate ensemble submission
rows = []
for n in range(1, 201):
    cfg = ensemble_configs[n]
    for i in range(n):
        rows.append({
            'id': f'{n:03d}_{i}',
            'x': f's{cfg["xs"][i]}',
            'y': f's{cfg["ys"][i]}',
            'deg': f's{cfg["degs"][i]}'
        })

df_out = pd.DataFrame(rows)
df_out.to_csv('/home/submission/submission.csv', index=False)
print(f'Saved ensemble submission with score {ensemble_total:.6f}')
print(f'Total rows: {len(df_out)}')