# Evolver Loop 2 Analysis: Overlap Repair Strategy

## Key Insight from Evaluator
The ensemble of best configs scores 67.77 (BELOW target of 68.93!) but has overlaps.
The path to victory is clear: REPAIR THE OVERLAPS.

## Analysis Goals
1. Understand the overlap situation in the best configs
2. Analyze how much improvement is locked behind overlaps
3. Design an overlap repair strategy

In [1]:
import numpy as np
import pandas as pd
import math
from numba import njit
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import os

# Tree polygon template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

tx, ty = make_polygon_template()
print('Template loaded')

Template loaded


In [2]:
# Overlap detection functions
def get_shapely_polygon(cx, cy, deg, tx, ty):
    r = deg * np.pi / 180.0
    c = np.cos(r)
    s = np.sin(r)
    px = c * tx - s * ty + cx
    py = s * tx + c * ty + cy
    return Polygon(zip(px, py))

def has_overlap(xs, ys, degs, tx, ty):
    n = len(xs)
    if n <= 1:
        return False
    polygons = [get_shapely_polygon(xs[i], ys[i], degs[i], tx, ty) for i in range(n)]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def find_overlapping_pairs(xs, ys, degs, tx, ty):
    """Find all pairs of overlapping trees"""
    n = len(xs)
    if n <= 1:
        return []
    polygons = [get_shapely_polygon(xs[i], ys[i], degs[i], tx, ty) for i in range(n)]
    tree_index = STRtree(polygons)
    pairs = []
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx <= i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                pairs.append((i, idx))
    return pairs

print('Overlap detection functions loaded')

Overlap detection functions loaded


In [3]:
# Load the ensemble with overlaps (67.77 score)
ensemble_path = '/home/nonroot/snapshots/santa-2025/21108486172/code/experiments/001_baseline/ensemble_submission.csv'
df_ensemble = pd.read_csv(ensemble_path)
df_ensemble['N'] = df_ensemble['id'].astype(str).str.split('_').str[0].astype(int)

# Load baseline (70.73 score, no overlaps)
baseline_path = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv'
df_baseline = pd.read_csv(baseline_path)
df_baseline['N'] = df_baseline['id'].astype(str).str.split('_').str[0].astype(int)

print('Loaded ensemble and baseline')

Loaded ensemble and baseline


In [4]:
# Analyze overlaps in the ensemble
overlap_analysis = []

for n in range(1, 201):
    g_ens = df_ensemble[df_ensemble['N'] == n]
    g_base = df_baseline[df_baseline['N'] == n]
    
    xs_ens = strip(g_ens['x'].to_numpy())
    ys_ens = strip(g_ens['y'].to_numpy())
    ds_ens = strip(g_ens['deg'].to_numpy())
    
    xs_base = strip(g_base['x'].to_numpy())
    ys_base = strip(g_base['y'].to_numpy())
    ds_base = strip(g_base['deg'].to_numpy())
    
    score_ens = score_group(xs_ens, ys_ens, ds_ens, tx, ty)
    score_base = score_group(xs_base, ys_base, ds_base, tx, ty)
    
    pairs = find_overlapping_pairs(xs_ens, ys_ens, ds_ens, tx, ty)
    
    overlap_analysis.append({
        'n': n,
        'score_ens': score_ens,
        'score_base': score_base,
        'improvement': score_base - score_ens,
        'num_overlaps': len(pairs),
        'has_overlap': len(pairs) > 0
    })

df_analysis = pd.DataFrame(overlap_analysis)
print(f'Total ensemble score: {df_analysis["score_ens"].sum():.6f}')
print(f'Total baseline score: {df_analysis["score_base"].sum():.6f}')
print(f'Potential improvement: {df_analysis["improvement"].sum():.6f}')
print(f'\nN values with overlaps: {df_analysis["has_overlap"].sum()}')
print(f'N values without overlaps: {(~df_analysis["has_overlap"]).sum()}')

Total ensemble score: 67.772662
Total baseline score: 70.734327
Potential improvement: 2.961665

N values with overlaps: 30
N values without overlaps: 170


In [5]:
# Show N values with most improvement potential (locked behind overlaps)
df_with_overlaps = df_analysis[df_analysis['has_overlap']].sort_values('improvement', ascending=False)
print('Top 20 N values with overlaps and improvement potential:')
print(df_with_overlaps[['n', 'score_ens', 'score_base', 'improvement', 'num_overlaps']].head(20).to_string())

print(f'\nTotal improvement locked behind overlaps: {df_with_overlaps["improvement"].sum():.6f}')

Top 20 N values with overlaps and improvement potential:
     n  score_ens  score_base  improvement  num_overlaps
6    7   0.162010    0.399897     0.237887            21
5    6   0.173625    0.399610     0.225985            15
9   10   0.166604    0.376630     0.210026            41
8    9   0.178013    0.387415     0.209402            34
4    5   0.212694    0.416850     0.204155            10
7    8   0.187564    0.385407     0.197844            27
3    4   0.228621    0.416545     0.187924             6
11  12   0.198679    0.372724     0.174045            48
14  15   0.214172    0.379203     0.165030            61
16  17   0.208784    0.370040     0.161257            55
2    3   0.305312    0.434745     0.129433             3
10  11   0.257940    0.375736     0.117796            30
17  18   0.274445    0.368771     0.094326            36
21  22   0.294317    0.375258     0.080941            58
22  23   0.290307    0.368752     0.078445            48
20  21   0.302114    0.376451  

In [6]:
# Check N values WITHOUT overlaps - these are free improvements!
df_no_overlaps = df_analysis[~df_analysis['has_overlap']]
print(f'N values without overlaps: {len(df_no_overlaps)}')
print(f'Improvement from non-overlapping configs: {df_no_overlaps["improvement"].sum():.6f}')

if len(df_no_overlaps) > 0:
    print('\nThese N values can be used directly from ensemble:')
    for _, row in df_no_overlaps.iterrows():
        if row['improvement'] > 0.0001:
            print(f'  N={row["n"]}: improvement={row["improvement"]:.6f}')

N values without overlaps: 170
Improvement from non-overlapping configs: 0.000000

These N values can be used directly from ensemble:


In [7]:
# Key insight: 170 N values have NO overlaps but also NO improvement
# This means the ensemble only has better configs for N values that HAVE overlaps!
# The 2.96 points of improvement is ALL locked behind overlaps

# Let's understand the overlap structure better
print("Summary of overlap situation:")
print(f"- 30 N values have overlaps (all the improvement is here)")
print(f"- 170 N values have no overlaps (same as baseline)")
print(f"- Total improvement potential: 2.96 points")
print(f"- If we can repair overlaps, we beat the target!")
print(f"\nTarget: 68.931058")
print(f"Baseline: 70.734327")
print(f"Ensemble (with overlaps): 67.772662")
print(f"Gap to target: {68.931058 - 67.772662:.6f} (we're ALREADY below target if overlaps fixed!)")

Summary of overlap situation:
- 30 N values have overlaps (all the improvement is here)
- 170 N values have no overlaps (same as baseline)
- Total improvement potential: 2.96 points
- If we can repair overlaps, we beat the target!

Target: 68.931058
Baseline: 70.734327
Ensemble (with overlaps): 67.772662
Gap to target: 1.158396 (we're ALREADY below target if overlaps fixed!)


In [9]:
# Analyze the overlap structure for a few N values to understand repair difficulty
for test_n in [3, 4, 5, 6, 7]:
    g = df_ensemble[df_ensemble['N'] == test_n]
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    
    pairs = find_overlapping_pairs(xs, ys, ds, tx, ty)
    print(f'\nN={test_n}: {len(pairs)} overlapping pairs out of {test_n} trees')
    improvement = df_analysis[df_analysis["n"]==test_n]["improvement"].values[0]
    print(f'  Improvement potential: {improvement:.6f}')
    
    # Check which trees are involved in overlaps
    involved = set()
    for i, j in pairs:
        involved.add(i)
        involved.add(j)
    print(f'  Trees involved in overlaps: {sorted(involved)}')


N=3: 3 overlapping pairs out of 3 trees
  Improvement potential: 0.129433
  Trees involved in overlaps: [0, 1, 2]

N=4: 6 overlapping pairs out of 4 trees
  Improvement potential: 0.187924
  Trees involved in overlaps: [0, 1, 2, 3]

N=5: 10 overlapping pairs out of 5 trees
  Improvement potential: 0.204155
  Trees involved in overlaps: [0, 1, 2, 3, 4]

N=6: 15 overlapping pairs out of 6 trees
  Improvement potential: 0.225985
  Trees involved in overlaps: [0, 1, 2, 3, 4, 5]

N=7: 21 overlapping pairs out of 7 trees
  Improvement potential: 0.237887
  Trees involved in overlaps: [0, 1, 2, 3, 4, 5, 6]


In [10]:
# Check the cpp_parallel_sa submissions directly to see if any have fewer overlaps
import glob

cpp_sa_files = glob.glob('/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp*.csv')
print(f'Found {len(cpp_sa_files)} cpp_parallel_sa submissions')

# For each file, count overlaps for N=3,4,5,6,7
for fp in sorted(cpp_sa_files)[:5]:  # Check first 5
    df = pd.read_csv(fp)
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    
    overlap_counts = []
    for test_n in [3, 4, 5, 6, 7]:
        g = df[df['N'] == test_n]
        xs = strip(g['x'].to_numpy())
        ys = strip(g['y'].to_numpy())
        ds = strip(g['deg'].to_numpy())
        pairs = find_overlapping_pairs(xs, ys, ds, tx, ty)
        overlap_counts.append(len(pairs))
    
    print(f'{fp.split("/")[-1]}: overlaps for N=3,4,5,6,7 = {overlap_counts}')

Found 20 cpp_parallel_sa submissions
submission_cpp.csv: overlaps for N=3,4,5,6,7 = [0, 0, 0, 0, 7]
submission_cpp10.csv: overlaps for N=3,4,5,6,7 = [3, 0, 3, 0, 7]
submission_cpp11.csv: overlaps for N=3,4,5,6,7 = [3, 1, 6, 1, 7]
submission_cpp12.csv: overlaps for N=3,4,5,6,7 = [3, 5, 6, 1, 11]
submission_cpp13.csv: overlaps for N=3,4,5,6,7 = [3, 6, 6, 1, 11]


In [11]:
# Check submission_cpp.csv more thoroughly - it seems to have fewer overlaps!
cpp_path = '/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp.csv'
df_cpp = pd.read_csv(cpp_path)
df_cpp['N'] = df_cpp['id'].astype(str).str.split('_').str[0].astype(int)

# Calculate total score and count overlaps for all N
cpp_scores = {}
cpp_overlaps = {}
for n in range(1, 201):
    g = df_cpp[df_cpp['N'] == n]
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    cpp_scores[n] = score_group(xs, ys, ds, tx, ty)
    pairs = find_overlapping_pairs(xs, ys, ds, tx, ty)
    cpp_overlaps[n] = len(pairs)

total_cpp = sum(cpp_scores.values())
n_with_overlaps = sum(1 for n, v in cpp_overlaps.items() if v > 0)
print(f'submission_cpp.csv total score: {total_cpp:.6f}')
print(f'N values with overlaps: {n_with_overlaps}')
print(f'N values without overlaps: {200 - n_with_overlaps}')

submission_cpp.csv total score: 84.596364
N values with overlaps: 183
N values without overlaps: 17


In [12]:
# Strategy: Create a new ensemble by picking best VALID (no overlap) config for each N
# from ALL available submission files

import glob

all_files = glob.glob('/home/nonroot/snapshots/**/*.csv', recursive=True)
print(f'Found {len(all_files)} CSV files total')

# Filter for submission-like files
submission_files = [f for f in all_files if 'submission' in f.lower() or 'santa' in f.lower()]
print(f'Found {len(submission_files)} potential submission files')

Found 236 CSV files total
Found 236 potential submission files


In [13]:
# Build best VALID ensemble from all sources
from tqdm import tqdm

best_valid = {n: {'score': 1e300, 'data': None, 'src': None} for n in range(1, 201)}

for fp in tqdm(submission_files, desc='Scanning'):
    try:
        df = pd.read_csv(fp)
    except Exception:
        continue
    
    if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
        continue
    
    # Check if values have 's' prefix
    sample_x = str(df['x'].iloc[0])
    if not sample_x.startswith('s'):
        continue
    
    df = df.copy()
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    
    for n, g in df.groupby('N'):
        if n < 1 or n > 200:
            continue
        
        xs = strip(g['x'].to_numpy())
        ys = strip(g['y'].to_numpy())
        ds = strip(g['deg'].to_numpy())
        
        # Check for overlaps - only keep valid configs
        if has_overlap(xs, ys, ds, tx, ty):
            continue
        
        sc = score_group(xs, ys, ds, tx, ty)
        if sc < best_valid[n]['score']:
            best_valid[n]['score'] = float(sc)
            best_valid[n]['data'] = g.drop(columns=['N']).copy()
            best_valid[n]['src'] = fp

print('\\nBest valid ensemble built!')

Scanning:   0%|          | 0/236 [00:00<?, ?it/s]

Scanning:   0%|          | 1/236 [00:01<06:31,  1.66s/it]

Scanning:   1%|          | 2/236 [00:03<06:15,  1.61s/it]

Scanning:   1%|▏         | 3/236 [00:04<06:09,  1.59s/it]

Scanning:   2%|▏         | 4/236 [00:06<05:48,  1.50s/it]

Scanning:   2%|▏         | 5/236 [00:06<04:39,  1.21s/it]

Scanning:   3%|▎         | 6/236 [00:07<03:55,  1.02s/it]

Scanning:   3%|▎         | 7/236 [00:08<03:26,  1.11it/s]

Scanning:   3%|▎         | 8/236 [00:09<04:11,  1.10s/it]

Scanning:   4%|▍         | 9/236 [00:10<03:41,  1.02it/s]

Scanning:   4%|▍         | 10/236 [00:11<03:18,  1.14it/s]

Scanning:   5%|▍         | 11/236 [00:12<04:04,  1.09s/it]

Scanning:   5%|▌         | 12/236 [00:13<03:33,  1.05it/s]

Scanning:   6%|▌         | 13/236 [00:14<04:11,  1.13s/it]

Scanning:   6%|▌         | 14/236 [00:16<04:29,  1.21s/it]

Scanning:   6%|▋         | 15/236 [00:16<03:50,  1.04s/it]

Scanning:   7%|▋         | 16/236 [00:17<03:23,  1.08it/s]

Scanning:   7%|▋         | 17/236 [00:18<03:04,  1.19it/s]

Scanning:   8%|▊         | 18/236 [00:19<03:40,  1.01s/it]

Scanning:   8%|▊         | 19/236 [00:20<03:16,  1.10it/s]

Scanning:   8%|▊         | 20/236 [00:20<02:59,  1.21it/s]

Scanning:   9%|▉         | 21/236 [00:21<02:46,  1.29it/s]

Scanning:   9%|▉         | 22/236 [00:22<02:37,  1.35it/s]

Scanning:  10%|▉         | 23/236 [00:23<03:31,  1.01it/s]

Scanning:  11%|█         | 26/236 [00:25<02:33,  1.37it/s]

Scanning:  11%|█▏        | 27/236 [00:26<03:10,  1.10it/s]

Scanning:  12%|█▏        | 28/236 [00:28<03:39,  1.06s/it]

Scanning:  12%|█▏        | 29/236 [00:29<03:57,  1.15s/it]

Scanning:  13%|█▎        | 30/236 [00:30<03:29,  1.02s/it]

Scanning:  13%|█▎        | 31/236 [00:31<03:08,  1.09it/s]

Scanning:  14%|█▎        | 32/236 [00:31<02:52,  1.18it/s]

Scanning:  14%|█▍        | 33/236 [00:32<02:47,  1.22it/s]

Scanning:  14%|█▍        | 34/236 [00:33<02:36,  1.29it/s]

Scanning:  15%|█▍        | 35/236 [00:33<02:28,  1.35it/s]

Scanning:  15%|█▌        | 36/236 [00:34<02:22,  1.40it/s]

Scanning:  16%|█▌        | 37/236 [00:35<02:23,  1.39it/s]

Scanning:  16%|█▌        | 38/236 [00:36<03:13,  1.02it/s]

Scanning:  17%|█▋        | 39/236 [00:38<03:34,  1.09s/it]

Scanning:  17%|█▋        | 40/236 [00:38<03:08,  1.04it/s]

Scanning:  17%|█▋        | 41/236 [00:39<02:49,  1.15it/s]

Scanning:  18%|█▊        | 42/236 [00:40<02:38,  1.22it/s]

Scanning:  18%|█▊        | 43/236 [00:41<03:09,  1.02it/s]

Scanning:  19%|█▊        | 44/236 [00:42<02:49,  1.13it/s]

Scanning:  19%|█▉        | 45/236 [00:42<02:35,  1.23it/s]

Scanning:  19%|█▉        | 46/236 [00:43<02:25,  1.31it/s]

Scanning:  20%|█▉        | 47/236 [00:44<02:21,  1.34it/s]

Scanning:  20%|██        | 48/236 [00:45<03:05,  1.01it/s]

Scanning:  21%|██        | 49/236 [00:47<03:28,  1.11s/it]

Scanning:  21%|██        | 50/236 [00:48<03:43,  1.20s/it]

Scanning:  22%|██▏       | 51/236 [00:50<04:03,  1.32s/it]

Scanning:  22%|██▏       | 52/236 [00:51<04:09,  1.35s/it]

Scanning:  22%|██▏       | 53/236 [00:53<04:12,  1.38s/it]

Scanning:  23%|██▎       | 54/236 [00:54<04:13,  1.39s/it]

Scanning:  23%|██▎       | 55/236 [00:56<04:19,  1.43s/it]

Scanning:  24%|██▎       | 56/236 [00:58<04:56,  1.65s/it]

Scanning:  24%|██▍       | 57/236 [01:00<05:14,  1.76s/it]

Scanning:  25%|██▍       | 58/236 [01:02<05:26,  1.84s/it]

Scanning:  25%|██▌       | 59/236 [01:04<05:35,  1.89s/it]

Scanning:  25%|██▌       | 60/236 [01:06<05:42,  1.95s/it]

Scanning:  26%|██▌       | 61/236 [01:08<05:44,  1.97s/it]

Scanning:  26%|██▋       | 62/236 [01:10<05:45,  1.99s/it]

Scanning:  27%|██▋       | 63/236 [01:12<05:45,  2.00s/it]

Scanning:  27%|██▋       | 64/236 [01:13<05:22,  1.87s/it]

Scanning:  28%|██▊       | 65/236 [01:15<04:54,  1.72s/it]

Scanning:  28%|██▊       | 66/236 [01:16<03:58,  1.40s/it]

Scanning:  28%|██▊       | 67/236 [01:16<03:19,  1.18s/it]

Scanning:  29%|██▉       | 68/236 [01:17<02:52,  1.03s/it]

Scanning:  29%|██▉       | 69/236 [01:18<02:36,  1.07it/s]

Scanning:  30%|██▉       | 70/236 [01:18<02:21,  1.17it/s]

Scanning:  30%|███       | 71/236 [01:19<02:12,  1.25it/s]

Scanning:  31%|███       | 72/236 [01:20<02:04,  1.31it/s]

Scanning:  31%|███       | 73/236 [01:20<02:02,  1.34it/s]

Scanning:  31%|███▏      | 74/236 [01:21<01:57,  1.38it/s]

Scanning:  32%|███▏      | 75/236 [01:22<01:53,  1.41it/s]

Scanning:  32%|███▏      | 76/236 [01:22<01:51,  1.43it/s]

Scanning:  33%|███▎      | 77/236 [01:23<01:49,  1.45it/s]

Scanning:  33%|███▎      | 78/236 [01:24<01:50,  1.43it/s]

Scanning:  33%|███▎      | 79/236 [01:24<01:48,  1.45it/s]

Scanning:  34%|███▍      | 80/236 [01:25<01:46,  1.46it/s]

Scanning:  34%|███▍      | 81/236 [01:26<01:45,  1.47it/s]

Scanning:  35%|███▍      | 82/236 [01:26<01:46,  1.45it/s]

Scanning:  35%|███▌      | 83/236 [01:27<01:44,  1.46it/s]

Scanning:  36%|███▌      | 84/236 [01:28<01:43,  1.47it/s]

Scanning:  36%|███▌      | 85/236 [01:28<01:42,  1.47it/s]

Scanning:  36%|███▋      | 86/236 [01:29<01:41,  1.48it/s]

Scanning:  37%|███▋      | 87/236 [01:30<01:43,  1.44it/s]

Scanning:  37%|███▋      | 88/236 [01:31<01:41,  1.45it/s]

Scanning:  38%|███▊      | 89/236 [01:31<01:40,  1.46it/s]

Scanning:  38%|███▊      | 90/236 [01:32<01:38,  1.48it/s]

Scanning:  39%|███▊      | 91/236 [01:33<01:39,  1.46it/s]

Scanning:  39%|███▉      | 92/236 [01:33<01:37,  1.48it/s]

Scanning:  39%|███▉      | 93/236 [01:34<01:35,  1.49it/s]

Scanning:  40%|███▉      | 94/236 [01:35<01:34,  1.50it/s]

Scanning:  40%|████      | 95/236 [01:35<01:35,  1.48it/s]

Scanning:  41%|████      | 96/236 [01:36<01:34,  1.49it/s]

Scanning:  41%|████      | 97/236 [01:37<01:32,  1.50it/s]

Scanning:  42%|████▏     | 98/236 [01:37<01:31,  1.51it/s]

Scanning:  42%|████▏     | 99/236 [01:38<01:30,  1.51it/s]

Scanning:  42%|████▏     | 100/236 [01:39<01:31,  1.49it/s]

Scanning:  43%|████▎     | 101/236 [01:40<02:05,  1.08it/s]

Scanning:  43%|████▎     | 102/236 [01:42<02:28,  1.11s/it]

Scanning:  44%|████▎     | 103/236 [01:43<02:44,  1.24s/it]

Scanning:  44%|████▍     | 104/236 [01:45<02:56,  1.34s/it]

Scanning:  44%|████▍     | 105/236 [01:46<03:02,  1.39s/it]

Scanning:  45%|████▍     | 106/236 [01:48<03:06,  1.43s/it]

Scanning:  45%|████▌     | 107/236 [01:49<03:08,  1.46s/it]

Scanning:  46%|████▌     | 108/236 [01:51<03:10,  1.49s/it]

Scanning:  46%|████▌     | 109/236 [01:52<03:12,  1.52s/it]

Scanning:  47%|████▋     | 110/236 [01:54<03:31,  1.68s/it]

Scanning:  47%|████▋     | 111/236 [01:57<03:42,  1.78s/it]

Scanning:  47%|████▋     | 112/236 [01:59<03:50,  1.86s/it]

Scanning:  48%|████▊     | 113/236 [02:01<03:56,  1.92s/it]

Scanning:  48%|████▊     | 114/236 [02:03<03:58,  1.95s/it]

Scanning:  49%|████▊     | 115/236 [02:05<03:59,  1.98s/it]

Scanning:  49%|████▉     | 116/236 [02:07<03:58,  1.99s/it]

Scanning:  50%|████▉     | 117/236 [02:09<03:58,  2.00s/it]

Scanning:  50%|█████     | 118/236 [02:11<03:58,  2.02s/it]

Scanning:  50%|█████     | 119/236 [02:13<03:56,  2.02s/it]

Scanning:  51%|█████     | 120/236 [02:15<03:54,  2.02s/it]

Scanning:  51%|█████▏    | 121/236 [02:17<03:52,  2.02s/it]

Scanning:  52%|█████▏    | 122/236 [02:19<03:51,  2.03s/it]

Scanning:  52%|█████▏    | 123/236 [02:21<03:49,  2.03s/it]

Scanning:  53%|█████▎    | 124/236 [02:23<03:48,  2.04s/it]

Scanning:  53%|█████▎    | 125/236 [02:25<03:46,  2.04s/it]

Scanning:  53%|█████▎    | 126/236 [02:27<03:45,  2.05s/it]

Scanning:  54%|█████▍    | 127/236 [02:29<03:45,  2.07s/it]

Scanning:  54%|█████▍    | 128/236 [02:31<03:43,  2.07s/it]

Scanning:  55%|█████▍    | 129/236 [02:33<03:40,  2.06s/it]

Scanning:  55%|█████▌    | 130/236 [02:35<03:38,  2.06s/it]

Scanning:  56%|█████▌    | 131/236 [02:37<03:37,  2.07s/it]

Scanning:  56%|█████▌    | 132/236 [02:40<03:35,  2.07s/it]

Scanning:  56%|█████▋    | 133/236 [02:42<03:32,  2.06s/it]

Scanning:  57%|█████▋    | 134/236 [02:44<03:29,  2.06s/it]

Scanning:  57%|█████▋    | 135/236 [02:46<03:29,  2.07s/it]

Scanning:  58%|█████▊    | 136/236 [02:48<03:27,  2.07s/it]

Scanning:  58%|█████▊    | 137/236 [02:50<03:24,  2.07s/it]

Scanning:  58%|█████▊    | 138/236 [02:52<03:21,  2.06s/it]

Scanning:  59%|█████▉    | 139/236 [02:54<03:19,  2.05s/it]

Scanning:  59%|█████▉    | 140/236 [02:56<03:17,  2.06s/it]

Scanning:  60%|█████▉    | 141/236 [02:58<03:14,  2.05s/it]

Scanning:  60%|██████    | 142/236 [03:00<03:13,  2.05s/it]

Scanning:  61%|██████    | 143/236 [03:02<03:10,  2.05s/it]

Scanning:  61%|██████    | 144/236 [03:04<03:10,  2.07s/it]

Scanning:  61%|██████▏   | 145/236 [03:06<03:07,  2.06s/it]

Scanning:  62%|██████▏   | 146/236 [03:08<03:04,  2.05s/it]

Scanning:  62%|██████▏   | 147/236 [03:10<02:48,  1.89s/it]

Scanning:  63%|██████▎   | 148/236 [03:12<02:50,  1.93s/it]

Scanning:  63%|██████▎   | 149/236 [03:14<02:52,  1.98s/it]

Scanning:  64%|██████▎   | 150/236 [03:16<02:51,  1.99s/it]

Scanning:  64%|██████▍   | 151/236 [03:18<02:50,  2.00s/it]

Scanning:  64%|██████▍   | 152/236 [03:20<02:48,  2.01s/it]

Scanning:  65%|██████▍   | 153/236 [03:22<02:48,  2.03s/it]

Scanning:  65%|██████▌   | 154/236 [03:24<02:46,  2.03s/it]

Scanning:  66%|██████▌   | 155/236 [03:26<02:44,  2.03s/it]

Scanning:  66%|██████▌   | 156/236 [03:28<02:42,  2.03s/it]

Scanning:  67%|██████▋   | 157/236 [03:30<02:40,  2.03s/it]

Scanning:  67%|██████▋   | 158/236 [03:32<02:39,  2.04s/it]

Scanning:  67%|██████▋   | 159/236 [03:34<02:37,  2.04s/it]

Scanning:  68%|██████▊   | 160/236 [03:36<02:35,  2.04s/it]

Scanning:  68%|██████▊   | 161/236 [03:38<02:33,  2.04s/it]

Scanning:  69%|██████▊   | 162/236 [03:41<02:32,  2.07s/it]

Scanning:  69%|██████▉   | 163/236 [03:43<02:30,  2.06s/it]

Scanning:  69%|██████▉   | 164/236 [03:45<02:28,  2.06s/it]

Scanning:  70%|██████▉   | 165/236 [03:47<02:26,  2.06s/it]

Scanning:  70%|███████   | 166/236 [03:49<02:25,  2.08s/it]

Scanning:  71%|███████   | 167/236 [03:51<02:22,  2.07s/it]

Scanning:  71%|███████   | 168/236 [03:53<02:20,  2.06s/it]

Scanning:  72%|███████▏  | 169/236 [03:55<02:17,  2.06s/it]

Scanning:  72%|███████▏  | 170/236 [03:57<02:15,  2.05s/it]

Scanning:  72%|███████▏  | 171/236 [03:59<02:14,  2.07s/it]

Scanning:  73%|███████▎  | 172/236 [04:01<02:12,  2.07s/it]

Scanning:  73%|███████▎  | 173/236 [04:03<02:10,  2.08s/it]

Scanning:  74%|███████▎  | 174/236 [04:05<02:08,  2.07s/it]

Scanning:  74%|███████▍  | 175/236 [04:07<02:07,  2.08s/it]

Scanning:  75%|███████▍  | 176/236 [04:10<02:04,  2.07s/it]

Scanning:  75%|███████▌  | 177/236 [04:11<01:52,  1.91s/it]

Scanning:  75%|███████▌  | 178/236 [04:12<01:29,  1.54s/it]

Scanning:  76%|███████▌  | 179/236 [04:13<01:27,  1.54s/it]

Scanning:  76%|███████▋  | 180/236 [04:15<01:29,  1.59s/it]

Scanning:  77%|███████▋  | 181/236 [04:17<01:28,  1.61s/it]

Scanning:  77%|███████▋  | 182/236 [04:17<01:11,  1.33s/it]

Scanning:  78%|███████▊  | 183/236 [04:18<01:00,  1.13s/it]

Scanning:  78%|███████▊  | 184/236 [04:19<00:52,  1.01s/it]

Scanning:  78%|███████▊  | 185/236 [04:19<00:46,  1.10it/s]

Scanning:  79%|███████▉  | 186/236 [04:20<00:41,  1.19it/s]

Scanning:  79%|███████▉  | 187/236 [04:21<00:38,  1.27it/s]

Scanning:  80%|███████▉  | 188/236 [04:21<00:36,  1.32it/s]

Scanning:  80%|████████  | 189/236 [04:22<00:34,  1.34it/s]

Scanning:  81%|████████  | 190/236 [04:23<00:33,  1.39it/s]

Scanning:  81%|████████  | 191/236 [04:23<00:31,  1.41it/s]

Scanning:  81%|████████▏ | 192/236 [04:24<00:30,  1.43it/s]

Scanning:  82%|████████▏ | 193/236 [04:25<00:30,  1.42it/s]

Scanning:  82%|████████▏ | 194/236 [04:26<00:29,  1.44it/s]

Scanning:  83%|████████▎ | 195/236 [04:26<00:28,  1.46it/s]

Scanning:  83%|████████▎ | 196/236 [04:27<00:27,  1.47it/s]

Scanning:  83%|████████▎ | 197/236 [04:28<00:26,  1.45it/s]

Scanning:  84%|████████▍ | 198/236 [04:28<00:26,  1.46it/s]

Scanning:  84%|████████▍ | 199/236 [04:29<00:25,  1.47it/s]

Scanning:  85%|████████▍ | 200/236 [04:30<00:24,  1.47it/s]

Scanning:  85%|████████▌ | 201/236 [04:30<00:23,  1.47it/s]

Scanning:  86%|████████▌ | 202/236 [04:31<00:23,  1.44it/s]

Scanning:  86%|████████▌ | 203/236 [04:32<00:22,  1.45it/s]

Scanning:  86%|████████▋ | 204/236 [04:32<00:21,  1.46it/s]

Scanning:  87%|████████▋ | 205/236 [04:33<00:21,  1.46it/s]

Scanning:  87%|████████▋ | 206/236 [04:34<00:20,  1.44it/s]

Scanning:  88%|████████▊ | 207/236 [04:34<00:19,  1.45it/s]

Scanning:  88%|████████▊ | 208/236 [04:35<00:19,  1.46it/s]

Scanning:  89%|████████▊ | 209/236 [04:36<00:18,  1.46it/s]

Scanning:  89%|████████▉ | 210/236 [04:36<00:17,  1.46it/s]

Scanning:  89%|████████▉ | 211/236 [04:37<00:17,  1.44it/s]

Scanning:  90%|████████▉ | 212/236 [04:38<00:16,  1.44it/s]

Scanning:  90%|█████████ | 213/236 [04:39<00:15,  1.45it/s]

Scanning:  91%|█████████ | 214/236 [04:39<00:15,  1.46it/s]

Scanning:  91%|█████████ | 215/236 [04:40<00:14,  1.43it/s]

Scanning:  92%|█████████▏| 216/236 [04:41<00:13,  1.44it/s]

Scanning:  92%|█████████▏| 217/236 [04:41<00:13,  1.45it/s]

Scanning:  92%|█████████▏| 218/236 [04:42<00:12,  1.45it/s]

Scanning:  93%|█████████▎| 219/236 [04:43<00:11,  1.46it/s]

Scanning:  93%|█████████▎| 220/236 [04:43<00:11,  1.43it/s]

Scanning:  94%|█████████▎| 221/236 [04:44<00:10,  1.45it/s]

Scanning:  94%|█████████▍| 222/236 [04:46<00:13,  1.02it/s]

Scanning:  94%|█████████▍| 223/236 [04:47<00:15,  1.19s/it]

Scanning:  95%|█████████▍| 224/236 [04:49<00:16,  1.35s/it]

Scanning:  95%|█████████▌| 225/236 [04:51<00:15,  1.45s/it]

Scanning:  96%|█████████▌| 226/236 [04:53<00:15,  1.51s/it]

Scanning:  96%|█████████▌| 227/236 [04:54<00:14,  1.56s/it]

Scanning:  97%|█████████▋| 228/236 [04:56<00:12,  1.61s/it]

Scanning:  97%|█████████▋| 229/236 [04:58<00:11,  1.62s/it]

Scanning:  97%|█████████▋| 230/236 [04:58<00:08,  1.34s/it]

Scanning:  98%|█████████▊| 231/236 [04:59<00:05,  1.14s/it]

Scanning:  98%|█████████▊| 232/236 [05:00<00:03,  1.00it/s]

Scanning:  99%|█████████▊| 233/236 [05:00<00:02,  1.09it/s]

Scanning:  99%|█████████▉| 234/236 [05:02<00:02,  1.14s/it]

Scanning: 100%|█████████▉| 235/236 [05:03<00:01,  1.00s/it]

Scanning: 100%|██████████| 236/236 [05:04<00:00,  1.16s/it]

Scanning: 100%|██████████| 236/236 [05:04<00:00,  1.29s/it]

\nBest valid ensemble built!





In [14]:
# Calculate the best valid ensemble score
valid_ensemble_score = sum(best_valid[n]['score'] for n in range(1, 201))
print(f'Best valid ensemble score: {valid_ensemble_score:.6f}')
print(f'Baseline score: 70.734327')
print(f'Target: 68.931058')
print(f'Gap to target: {valid_ensemble_score - 68.931058:.6f}')

# Check how many N values improved
improved_n = []
for n in range(1, 201):
    baseline_score = df_analysis[df_analysis['n']==n]['score_base'].values[0]
    if best_valid[n]['score'] < baseline_score - 0.0001:
        improved_n.append(n)

print(f'\\nN values with improvement: {len(improved_n)}')

Best valid ensemble score: 70.734327
Baseline score: 70.734327
Target: 68.931058
Gap to target: 1.803269
\nN values with improvement: 0


In [None]:
# Analyze a specific N value with overlaps to understand the repair challenge
test_n = 10  # Pick a small N to analyze

g = df_ensemble[df_ensemble['N'] == test_n]
xs = strip(g['x'].to_numpy())
ys = strip(g['y'].to_numpy())
ds = strip(g['deg'].to_numpy())

pairs = find_overlapping_pairs(xs, ys, ds, tx, ty)
print(f'N={test_n}: {len(pairs)} overlapping pairs')
for i, j in pairs:
    print(f'  Trees {i} and {j} overlap')
    print(f'    Tree {i}: x={xs[i]:.4f}, y={ys[i]:.4f}, deg={ds[i]:.2f}')
    print(f'    Tree {j}: x={xs[j]:.4f}, y={ys[j]:.4f}, deg={ds[j]:.2f}')