# Evolver Loop 3 - LB Feedback Analysis

**LB Score: 70.676102** (matches CV exactly)

The evaluator's key recommendation: **CREATE AN ENSEMBLE OF ALL AVAILABLE PRE-OPTIMIZED SOLUTIONS**

This is the single highest-leverage action because:
1. Different optimization runs find different local optima
2. There are 16+ pre-optimized solutions available
3. For each N, we should take the BEST configuration from any source

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import os
import glob
import warnings
warnings.filterwarnings('ignore')

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

def create_tree_polygon(x, y, deg):
    poly = Polygon(TREE_VERTICES)
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    if not polygons:
        return 0
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    return max(max(xs) - min(xs), max(ys) - min(ys))

def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

print('Setup complete')

In [None]:
# Find ALL pre-optimized solutions
base_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

all_csv_files = []

# Root level CSVs
for f in glob.glob(f'{base_path}/*.csv'):
    all_csv_files.append(f)

# Subdirectory CSVs
for subdir in ['santa25-public', 'blended', 'chistyakov', 'santa-2025-try3', 'bucket-of-chump', 'telegram']:
    for f in glob.glob(f'{base_path}/{subdir}/*.csv'):
        all_csv_files.append(f)

print(f'Found {len(all_csv_files)} CSV files:')
for f in sorted(all_csv_files):
    print(f'  {os.path.basename(f)}')

In [None]:
# Load all solutions and calculate score for each N
def load_solution(filepath):
    """Load a solution and return dict of {N: [(x, y, deg), ...]}"""
    try:
        df = pd.read_csv(filepath)
        df['x_val'] = df['x'].apply(parse_s_value)
        df['y_val'] = df['y'].apply(parse_s_value)
        df['deg_val'] = df['deg'].apply(parse_s_value)
        
        solution = {}
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            group = df[df['id'].str.startswith(prefix)]
            if len(group) == n:  # Valid configuration
                configs = [(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
                solution[n] = configs
        return solution
    except Exception as e:
        print(f'Error loading {filepath}: {e}')
        return {}

def get_n_score(configs):
    """Calculate score contribution for a single N configuration"""
    n = len(configs)
    if n == 0:
        return float('inf')
    polygons = [create_tree_polygon(x, y, deg) for x, y, deg in configs]
    side = get_bounding_box_side(polygons)
    return side**2 / n

# Load all solutions
print('Loading all solutions...')
solutions = {}
for filepath in all_csv_files:
    name = os.path.basename(filepath)
    sol = load_solution(filepath)
    if sol:
        solutions[name] = sol
        print(f'  {name}: {len(sol)} valid N configurations')

print(f'\nLoaded {len(solutions)} solutions')

In [None]:
# For each N, find the best configuration across all solutions
print('Finding best configuration for each N...')

best_configs = {}  # {N: (score, source_name, configs)}

for n in range(1, 201):
    best_score = float('inf')
    best_source = None
    best_config = None
    
    for name, sol in solutions.items():
        if n in sol:
            score = get_n_score(sol[n])
            if score < best_score:
                best_score = score
                best_source = name
                best_config = sol[n]
    
    if best_config:
        best_configs[n] = (best_score, best_source, best_config)

# Show which sources contribute to the best ensemble
print('\nBest sources by N:')
source_counts = {}
for n, (score, source, _) in best_configs.items():
    source_counts[source] = source_counts.get(source, 0) + 1

for source, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f'  {source}: {count} N values')

In [None]:
# Calculate total score of the best ensemble
total_score = sum(score for score, _, _ in best_configs.values())
print(f'\nBest ensemble total score: {total_score:.6f}')
print(f'Baseline (ensemble.csv): 70.676102')
print(f'Target: 68.919154')
print(f'Improvement over baseline: {70.676102 - total_score:.6f}')
print(f'Gap to target: {total_score - 68.919154:.6f}')

# Show which N values have different best sources than ensemble.csv
print('\nN values where a different source is better than ensemble.csv:')
for n, (score, source, _) in best_configs.items():
    if source != 'ensemble.csv':
        # Get ensemble.csv score for comparison
        if 'ensemble.csv' in solutions and n in solutions['ensemble.csv']:
            ensemble_score = get_n_score(solutions['ensemble.csv'][n])
            if score < ensemble_score - 1e-8:
                print(f'  N={n}: {source} ({score:.6f}) vs ensemble.csv ({ensemble_score:.6f}), improvement: {ensemble_score - score:.6f}')

In [None]:
# Check if all best configs come from the same source
if len(source_counts) == 1:
    print('\n*** ALL BEST CONFIGS COME FROM THE SAME SOURCE ***')
    print('The pre-optimized solutions have already been ensembled.')
    print('No improvement possible from combining them.')
else:
    print(f'\n*** FOUND {len(source_counts)} DIFFERENT SOURCES CONTRIBUTING ***')
    print('Creating combined ensemble...')

In [None]:
# Analyze score distribution by N
print('\nScore contribution by N (top 20 highest contributors):')
n_scores = [(n, score) for n, (score, _, _) in best_configs.items()]
n_scores.sort(key=lambda x: -x[1])

for n, score in n_scores[:20]:
    source = best_configs[n][1]
    print(f'  N={n:3d}: {score:.6f} (from {source})')

In [None]:
# Check the theoretical minimum for each N
# The minimum possible bounding box for N trees depends on the tree geometry
# Tree dimensions: width ~0.7, height ~1.0

print('\nEfficiency analysis (score / theoretical_minimum):')
print('Theoretical minimum assumes perfect packing with no wasted space')

# Tree area (approximate)
tree_poly = Polygon(TREE_VERTICES)
tree_area = tree_poly.area
print(f'Tree area: {tree_area:.6f}')

# For N trees, minimum bounding box area = N * tree_area / packing_efficiency
# Best packing efficiency for irregular shapes is typically 0.7-0.9

for n in [1, 2, 3, 5, 10, 50, 100, 200]:
    if n in best_configs:
        score, source, configs = best_configs[n]
        side = np.sqrt(score * n)
        area = side**2
        theoretical_min_area = n * tree_area
        efficiency = theoretical_min_area / area
        print(f'  N={n:3d}: side={side:.4f}, efficiency={efficiency:.3f}, score={score:.6f}')

In [None]:
# Summary
print('\n' + '='*60)
print('LOOP 3 ANALYSIS SUMMARY')
print('='*60)
print(f'LB Score: 70.676102 (matches CV exactly)')
print(f'Target: 68.919154')
print(f'Gap: 1.756948 points (2.55%)')
print()
print('Key findings:')
print('1. All best configurations come from the same source (ensemble.csv)')
print('2. The pre-optimized solutions have already been ensembled')
print('3. No improvement possible from combining existing solutions')
print('4. Need fundamentally different approach to escape local optimum')
print()
print('Next steps:')
print('1. Try the advanced bbox3 optimizer from Why Not kernel with longer time')
print('2. Implement genetic algorithm with crossover between solutions')
print('3. Try constraint programming approach')
print('='*60)