# Evolver Loop 3 Analysis: Ensemble of Pre-optimized Solutions

The evaluator's top priority recommendation is to create an ensemble by taking the best N configuration from each available pre-optimized solution. This is guaranteed to be at least as good as the best single solution.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import os
import glob
import warnings
warnings.filterwarnings('ignore')

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    poly = Polygon(TREE_VERTICES)
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    if not polygons:
        return 0
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    return max(max(xs) - min(xs), max(ys) - min(ys))

def has_overlap(polygons):
    if len(polygons) < 2:
        return False
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    intersection = poly.intersection(polygons[idx])
                    if intersection.area > 1e-10:
                        return True
    return False

print('Setup complete')

Setup complete


In [2]:
# Find all pre-optimized solutions
base_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

solution_files = [
    f'{base_path}/ensemble.csv',
    f'{base_path}/santa-2025.csv',
    f'{base_path}/santa25-public/santa2025_ver2_v61.csv',
    f'{base_path}/santa25-public/santa2025_ver2_v63.csv',
    f'{base_path}/santa25-public/santa2025_ver2_v65.csv',
    f'{base_path}/santa25-public/santa2025_ver2_v66.csv',
    f'{base_path}/santa25-public/santa2025_ver2_v67.csv',
    f'{base_path}/santa25-public/santa2025_ver2_v68.csv',
    f'{base_path}/santa25-public/santa2025_ver2_v69.csv',
    f'{base_path}/santa25-public/santa2025_ver2_v76.csv',
    f'{base_path}/santa25-public/submission_70_926149550346.csv',
    f'{base_path}/santa25-public/submission_70_936673758122.csv',
    f'{base_path}/santa25-public/submission_JKoT1.csv',
    f'{base_path}/santa25-public/submission_JKoT2.csv',
    f'{base_path}/santa25-public/submission_JKoT3.csv',
    f'{base_path}/santa25-public/submission_JKoT4.csv',
    f'{base_path}/santa25-public/submission_opt1.csv',
    f'{base_path}/chistyakov/submission_best.csv',
    f'{base_path}/santa-2025-try3/submission.csv',
    f'{base_path}/santa-2025-try3/submission_sa.csv',
    f'{base_path}/blended/submission (77).csv',
]

# Load all solutions
solutions = {}
for path in solution_files:
    if os.path.exists(path):
        try:
            df = pd.read_csv(path)
            df['x_val'] = df['x'].apply(parse_s_value)
            df['y_val'] = df['y'].apply(parse_s_value)
            df['deg_val'] = df['deg'].apply(parse_s_value)
            name = os.path.basename(path)
            solutions[name] = df
            print(f'Loaded: {name} ({len(df)} rows)')
        except Exception as e:
            print(f'Error loading {path}: {e}')

print(f'\nTotal solutions loaded: {len(solutions)}')

Loaded: ensemble.csv (20100 rows)
Loaded: santa-2025.csv (20100 rows)
Loaded: santa2025_ver2_v61.csv (20100 rows)
Loaded: santa2025_ver2_v63.csv (20100 rows)


Loaded: santa2025_ver2_v65.csv (20100 rows)
Loaded: santa2025_ver2_v66.csv (20100 rows)


Loaded: santa2025_ver2_v67.csv (20100 rows)
Loaded: santa2025_ver2_v68.csv (20100 rows)
Loaded: santa2025_ver2_v69.csv (20100 rows)
Loaded: santa2025_ver2_v76.csv (20100 rows)


Loaded: submission_70_926149550346.csv (20100 rows)
Loaded: submission_70_936673758122.csv (20100 rows)


Loaded: submission_JKoT1.csv (20100 rows)
Loaded: submission_JKoT2.csv (20100 rows)
Loaded: submission_JKoT3.csv (20100 rows)


Loaded: submission_JKoT4.csv (20100 rows)
Loaded: submission_opt1.csv (20100 rows)
Loaded: submission_best.csv (20100 rows)


Loaded: submission.csv (20100 rows)
Loaded: submission_sa.csv (20100 rows)
Loaded: submission (77).csv (20100 rows)

Total solutions loaded: 21


In [3]:
# Calculate score for each N in each solution
def get_n_score(df, n):
    """Get score contribution for N trees from a solution"""
    prefix = f'{n:03d}_'
    group = df[df['id'].str.startswith(prefix)]
    if len(group) != n:
        return None, None  # Invalid configuration
    
    polygons = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in group.iterrows()]
    side = get_bounding_box_side(polygons)
    score = side**2 / n
    return score, group

# Build a matrix of scores for each (solution, N) pair
print('Calculating scores for all (solution, N) pairs...')
scores_matrix = {}

for name, df in solutions.items():
    scores_matrix[name] = {}
    for n in range(1, 201):
        score, _ = get_n_score(df, n)
        scores_matrix[name][n] = score

print('Done calculating scores')

Calculating scores for all (solution, N) pairs...


Done calculating scores


In [4]:
# Find the best solution for each N
best_for_n = {}
for n in range(1, 201):
    best_score = float('inf')
    best_solution = None
    for name in solutions.keys():
        score = scores_matrix[name].get(n)
        if score is not None and score < best_score:
            best_score = score
            best_solution = name
    best_for_n[n] = (best_solution, best_score)

# Show which solutions contribute best N values
print('Best solution for each N (first 20):')
for n in range(1, 21):
    sol, score = best_for_n[n]
    print(f'N={n:3d}: {sol:40s} score={score:.6f}')

# Count how many N values each solution wins
solution_wins = {}
for n, (sol, _) in best_for_n.items():
    solution_wins[sol] = solution_wins.get(sol, 0) + 1

print('\nSolution wins (how many N values each solution is best for):')
for sol, wins in sorted(solution_wins.items(), key=lambda x: -x[1]):
    print(f'{sol:40s}: {wins:3d} N values')

Best solution for each N (first 20):
N=  1: ensemble.csv                             score=0.661250
N=  2: ensemble.csv                             score=0.450779
N=  3: ensemble.csv                             score=0.434745
N=  4: ensemble.csv                             score=0.416545
N=  5: ensemble.csv                             score=0.416850
N=  6: ensemble.csv                             score=0.399610
N=  7: ensemble.csv                             score=0.399897
N=  8: ensemble.csv                             score=0.385407
N=  9: ensemble.csv                             score=0.387415
N= 10: ensemble.csv                             score=0.376630
N= 11: ensemble.csv                             score=0.375736
N= 12: ensemble.csv                             score=0.372724
N= 13: ensemble.csv                             score=0.372323
N= 14: ensemble.csv                             score=0.370569
N= 15: ensemble.csv                             score=0.379203
N= 16: ensemble.cs

In [5]:
# Calculate total score if we use the best N from each solution
ensemble_score = sum(score for _, score in best_for_n.values())
print(f'Ensemble score (best N from each solution): {ensemble_score:.6f}')

# Compare with baseline
baseline_score = sum(scores_matrix['ensemble.csv'].values())
print(f'Baseline score (ensemble.csv): {baseline_score:.6f}')
print(f'Improvement: {baseline_score - ensemble_score:.6f}')
print(f'Target: 68.919154')
print(f'Gap to target: {ensemble_score - 68.919154:.6f}')

Ensemble score (best N from each solution): 70.676102
Baseline score (ensemble.csv): 70.676102
Improvement: 0.000000
Target: 68.919154
Gap to target: 1.756948


In [6]:
# Create the ensemble submission
print('Creating ensemble submission...')

ensemble_rows = []
for n in range(1, 201):
    best_solution, _ = best_for_n[n]
    df = solutions[best_solution]
    prefix = f'{n:03d}_'
    group = df[df['id'].str.startswith(prefix)]
    for _, row in group.iterrows():
        ensemble_rows.append({
            'id': row['id'],
            'x': row['x'],
            'y': row['y'],
            'deg': row['deg']
        })

ensemble_df = pd.DataFrame(ensemble_rows)
print(f'Created ensemble with {len(ensemble_df)} rows')
print(ensemble_df.head(10))

Creating ensemble submission...


Created ensemble with 20100 rows
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   
5  003_2      s0.641714640229075      s1.180458566613381   
6  004_0  s-0.324747789589372171   s0.132109978088185392   
7  004_1   s0.315354346242637695   s0.132109978063475492   
8  004_2   s0.324747789592379210  s-0.732109978069475531   
9  004_3  s-0.315354348134818330  s-0.732109978094185987   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  
5      s155.13405193710082  
6  s156.370622145636389178  
7  s156.370622269264089255  
8  s336.370622269264003990  
9  s336.37062214

In [7]:
# Validate the ensemble for overlaps
print('Validating ensemble for overlaps...')

overlap_count = 0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    group = ensemble_df[ensemble_df['id'].str.startswith(prefix)]
    polygons = []
    for _, row in group.iterrows():
        x = parse_s_value(row['x'])
        y = parse_s_value(row['y'])
        deg = parse_s_value(row['deg'])
        polygons.append(create_tree_polygon(x, y, deg))
    
    if has_overlap(polygons):
        overlap_count += 1
        print(f'N={n}: OVERLAP DETECTED!')

if overlap_count == 0:
    print('Ensemble has NO OVERLAPS ✓')
else:
    print(f'WARNING: {overlap_count} configurations have overlaps!')

Validating ensemble for overlaps...


Ensemble has NO OVERLAPS ✓


In [8]:
# Save the ensemble
ensemble_df.to_csv('/home/submission/submission.csv', index=False)
print('Saved ensemble to /home/submission/submission.csv')

# Also save to experiments folder
import os
os.makedirs('/home/code/experiments/005_ensemble_best_n', exist_ok=True)
ensemble_df.to_csv('/home/code/experiments/005_ensemble_best_n/submission.csv', index=False)
print('Saved to experiments folder')

Saved ensemble to /home/submission/submission.csv
Saved to experiments folder


In [9]:
# Summary
print('='*60)
print('ENSEMBLE ANALYSIS SUMMARY')
print('='*60)
print(f'Solutions analyzed: {len(solutions)}')
print(f'Baseline score (ensemble.csv): {baseline_score:.6f}')
print(f'Ensemble score (best N from each): {ensemble_score:.6f}')
print(f'Improvement: {baseline_score - ensemble_score:.6f}')
print(f'Target: 68.919154')
print(f'Gap to target: {ensemble_score - 68.919154:.6f}')
print('='*60)

# Show which N values improved
print('\nN values where ensemble differs from baseline:')
for n in range(1, 201):
    best_sol, best_score = best_for_n[n]
    baseline_n_score = scores_matrix['ensemble.csv'][n]
    if best_sol != 'ensemble.csv' and abs(best_score - baseline_n_score) > 1e-8:
        print(f'N={n:3d}: {best_sol:40s} saves {baseline_n_score - best_score:.6f}')

ENSEMBLE ANALYSIS SUMMARY
Solutions analyzed: 21
Baseline score (ensemble.csv): 70.676102
Ensemble score (best N from each): 70.676102
Improvement: 0.000000
Target: 68.919154
Gap to target: 1.756948

N values where ensemble differs from baseline:


In [None]:
# Let's analyze the score distribution across N values to find where improvements might be possible
print('Score contribution by N value (top 20 contributors):')
n_scores = [(n, scores_matrix['ensemble.csv'][n]) for n in range(1, 201)]
n_scores_sorted = sorted(n_scores, key=lambda x: -x[1])

for n, score in n_scores_sorted[:20]:
    print(f'N={n:3d}: score={score:.6f}')

In [None]:
# Calculate theoretical minimum score for each N
# The tree has area = 0.4025 (calculated from polygon)
# For N trees, minimum bounding box area = N * tree_area / packing_efficiency
# Best packing efficiency for irregular shapes is typically 0.8-0.9\n\ntree_poly = Polygon(TREE_VERTICES)\ntree_area = tree_poly.area\nprint(f'Tree area: {tree_area:.6f}')\n\n# Calculate packing efficiency for each N\nprint('\\nPacking efficiency by N (side^2 / (N * tree_area)):')\nefficiencies = []\nfor n in range(1, 201):\n    score = scores_matrix['ensemble.csv'][n]\n    side_sq = score * n\n    efficiency = (n * tree_area) / side_sq  # How much of the box is filled\n    efficiencies.append((n, efficiency, score))\n\n# Show worst efficiencies (most room for improvement)\nefficiencies_sorted = sorted(efficiencies, key=lambda x: x[1])\nprint('\\nWorst packing efficiencies (most room for improvement):')\nfor n, eff, score in efficiencies_sorted[:20]:\n    print(f'N={n:3d}: efficiency={eff:.4f}, score={score:.6f}')