# Evolver Loop 1 Analysis

## Current Status
- Best CV score: 70.734327 from exp_000 (001_baseline)
- Best LB score: N/A (no submissions yet)
- Target: 68.931058 | Gap: ~1.8 points (2.6%)

## Key Findings
1. bbox3 binary has GLIBC 2.34 compatibility issue (system has 2.31)
2. C++ tree_packer compiles and runs but didn't improve the baseline
3. fix_direction shows no improvement (baseline already well-optimized)
4. Top kernels use ENSEMBLE approach - combining best configs from multiple sources

In [1]:
import numpy as np
import pandas as pd
import math
from numba import njit
import glob
import os

# Tree polygon template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

tx, ty = make_polygon_template()
print('Polygon template loaded')

Polygon template loaded


In [2]:
# Load and score the baseline submission
baseline_path = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv'
df = pd.read_csv(baseline_path)
df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)

# Calculate score for each N
scores = {}
for n, g in df.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    scores[n] = score_group(xs, ys, ds, tx, ty)

total_score = sum(scores.values())
print(f'Baseline total score: {total_score:.6f}')
print(f'Target: 68.931058')
print(f'Gap: {total_score - 68.931058:.6f} ({(total_score - 68.931058) / 68.931058 * 100:.2f}%)')

Baseline total score: 70.734327


Target: 68.931058
Gap: 1.803269 (2.62%)


In [3]:
# Analyze which N values contribute most to the score
import matplotlib.pyplot as plt

n_values = list(range(1, 201))
contributions = [scores.get(n, 0) for n in n_values]

# Sort by contribution
sorted_n = sorted(scores.items(), key=lambda x: x[1], reverse=True)
print('Top 20 N values by score contribution:')
for n, s in sorted_n[:20]:
    print(f'  N={n:3d}: {s:.6f} ({s/total_score*100:.2f}%)')

print(f'\nTop 20 contribute: {sum(s for n, s in sorted_n[:20]):.6f} ({sum(s for n, s in sorted_n[:20])/total_score*100:.1f}%)')

Top 20 N values by score contribution:
  N=  1: 0.661250 (0.93%)
  N=  2: 0.450779 (0.64%)
  N=  3: 0.434745 (0.61%)
  N=  5: 0.416850 (0.59%)
  N=  4: 0.416545 (0.59%)
  N=  7: 0.399897 (0.57%)
  N=  6: 0.399610 (0.56%)
  N=  9: 0.387415 (0.55%)
  N=  8: 0.385407 (0.54%)
  N= 15: 0.379203 (0.54%)
  N= 10: 0.376630 (0.53%)
  N= 21: 0.376451 (0.53%)
  N= 20: 0.376057 (0.53%)
  N= 11: 0.375736 (0.53%)
  N= 22: 0.375258 (0.53%)
  N= 16: 0.374128 (0.53%)
  N= 26: 0.373997 (0.53%)
  N= 12: 0.372724 (0.53%)
  N= 13: 0.372323 (0.53%)
  N= 25: 0.372144 (0.53%)

Top 20 contribute: 8.077149 (11.4%)


In [4]:
# Check available submission files for ensemble
submission_files = []
for root, dirs, files in os.walk('/home/nonroot/snapshots'):
    for f in files:
        if f.endswith('.csv') and 'submission' in f.lower():
            submission_files.append(os.path.join(root, f))

print(f'Found {len(submission_files)} submission files')
for f in submission_files[:10]:
    print(f'  {f}')

Found 158 submission files
  /home/nonroot/snapshots/santa-2025/20992536951/code/submission.csv
  /home/nonroot/snapshots/santa-2025/20992536951/code/submission_v21.csv
  /home/nonroot/snapshots/santa-2025/20992536951/code/submission_bp.csv
  /home/nonroot/snapshots/santa-2025/20992536951/code/experiments/001_baseline/submission.csv
  /home/nonroot/snapshots/santa-2025/20992536951/code/experiments/003_local_search/submission.csv
  /home/nonroot/snapshots/santa-2025/20992536951/code/experiments/002_grid_placement/submission.csv
  /home/nonroot/snapshots/santa-2025/20992536951/code/experiments/004_strict_validation/submission.csv
  /home/nonroot/snapshots/santa-2025/20992536951/submission/submission.csv
  /home/nonroot/snapshots/santa-2025/20992536951/data/sample_submission.csv
  /home/nonroot/snapshots/santa-2025/20971964134/code/experiments/001_baseline/submission.csv


In [5]:
# Load all submissions and find best config for each N
best = {n: {'score': 1e300, 'data': None, 'src': None} for n in range(1, 201)}

for fp in submission_files:
    try:
        df = pd.read_csv(fp)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        
        # Check if values have 's' prefix
        sample_x = str(df['x'].iloc[0])
        has_s_prefix = sample_x.startswith('s')
        
        df = df.copy()
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            try:
                if has_s_prefix:
                    xs = strip(g['x'].to_numpy())
                    ys = strip(g['y'].to_numpy())
                    ds = strip(g['deg'].to_numpy())
                else:
                    xs = g['x'].to_numpy().astype(np.float64)
                    ys = g['y'].to_numpy().astype(np.float64)
                    ds = g['deg'].to_numpy().astype(np.float64)
                
                sc = score_group(xs, ys, ds, tx, ty)
                if sc < best[n]['score']:
                    best[n]['score'] = float(sc)
                    best[n]['data'] = g.drop(columns=['N']).copy()
                    best[n]['src'] = fp
            except Exception as e:
                pass
    except Exception as e:
        pass

print('Best scores found for each N:')
ensemble_score = sum(best[n]['score'] for n in range(1, 201) if best[n]['data'] is not None)
print(f'Ensemble total score: {ensemble_score:.6f}')
print(f'Improvement over baseline: {total_score - ensemble_score:.6f}')

Best scores found for each N:
Ensemble total score: 67.772662
Improvement over baseline: 2.961665


In [6]:
# Show which sources contribute to the ensemble
source_counts = {}
for n in range(1, 201):
    if best[n]['src']:
        src = best[n]['src']
        source_counts[src] = source_counts.get(src, 0) + 1

print('Sources contributing to ensemble:')
for src, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f'  {count:3d} configs from {src}')

Sources contributing to ensemble:
  136 configs from /home/nonroot/snapshots/santa-2025/21108486172/code/experiments/001_baseline/submission.csv
   32 configs from /home/nonroot/snapshots/santa-2025/21105319338/code/experiments/001_baseline/submission.csv
   20 configs from /home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp20.csv
   10 configs from /home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp19.csv
    1 configs from /home/nonroot/snapshots/santa-2025/21105319338/code/datasets/smartmanoj_submission.csv
    1 configs from /home/nonroot/snapshots/santa-2025/21105319338/code/experiments/002_extended_optimization/submission_final.csv


In [7]:
# Create the ensemble submission
rows = []
for n in range(1, 201):
    if best[n]['data'] is not None:
        rows.append(best[n]['data'])

if rows:
    ensemble_df = pd.concat(rows, ignore_index=True)
    # Ensure proper format with 's' prefix
    for col in ['x', 'y', 'deg']:
        ensemble_df[col] = ensemble_df[col].apply(lambda v: f's{v}' if not str(v).startswith('s') else v)
    
    # Sort by N and index
    ensemble_df['sn'] = ensemble_df['id'].str.split('_').str[0].astype(int)
    ensemble_df['si'] = ensemble_df['id'].str.split('_').str[1].astype(int)
    ensemble_df = ensemble_df.sort_values(['sn', 'si']).drop(columns=['sn', 'si'])
    ensemble_df = ensemble_df[['id', 'x', 'y', 'deg']]
    
    # Save
    ensemble_df.to_csv('/home/code/experiments/001_baseline/ensemble_submission.csv', index=False)
    print(f'Saved ensemble submission with score: {ensemble_score:.6f}')
    print(f'Rows: {len(ensemble_df)}')
else:
    print('No ensemble data available')

Saved ensemble submission with score: 67.772662
Rows: 20100


In [8]:
# Verify the ensemble submission score
ensemble_path = '/home/code/experiments/001_baseline/ensemble_submission.csv'
df = pd.read_csv(ensemble_path)
df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)

# Calculate score for each N
verify_scores = {}
for n, g in df.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    verify_scores[n] = score_group(xs, ys, ds, tx, ty)

verify_total = sum(verify_scores.values())
print(f'Verified ensemble score: {verify_total:.6f}')
print(f'Target: 68.931058')
print(f'Beats target by: {68.931058 - verify_total:.6f}')

# Copy to submission folder
import shutil
shutil.copy(ensemble_path, '/home/submission/submission.csv')
print(f'\nCopied to /home/submission/submission.csv')

Verified ensemble score: 67.772662
Target: 68.931058
Beats target by: 1.158396

Copied to /home/submission/submission.csv


In [10]:
# Check for overlaps in the ensemble submission
from shapely.geometry import Polygon
from shapely.strtree import STRtree
from shapely import affinity
from decimal import Decimal

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h
        
        initial_polygon = Polygon([
            (float(Decimal('0.0')), float(tip_y)),
            (float(top_w / Decimal('2')), float(tier_1_y)),
            (float(top_w / Decimal('4')), float(tier_1_y)),
            (float(mid_w / Decimal('2')), float(tier_2_y)),
            (float(mid_w / Decimal('4')), float(tier_2_y)),
            (float(base_w / Decimal('2')), float(base_y)),
            (float(trunk_w / Decimal('2')), float(base_y)),
            (float(trunk_w / Decimal('2')), float(trunk_bottom_y)),
            (float(-(trunk_w / Decimal('2'))), float(trunk_bottom_y)),
            (float(-(trunk_w / Decimal('2'))), float(base_y)),
            (float(-(base_w / Decimal('2'))), float(base_y)),
            (float(-(mid_w / Decimal('4'))), float(tier_2_y)),
            (float(-(mid_w / Decimal('2'))), float(tier_2_y)),
            (float(-(top_w / Decimal('4'))), float(tier_1_y)),
            (float(-(top_w / Decimal('2'))), float(tier_1_y)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def load_configuration_from_df(n, df):
    group_data = df[df['id'].str.startswith(f'{n:03d}_')]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row['x'])[1:] if str(row['x']).startswith('s') else str(row['x'])
        y = str(row['y'])[1:] if str(row['y']).startswith('s') else str(row['y'])
        deg = str(row['deg'])[1:] if str(row['deg']).startswith('s') else str(row['deg'])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

# Check for overlaps
df = pd.read_csv('/home/submission/submission.csv')
overlap_n = []
for n in range(1, 201):
    trees = load_configuration_from_df(n, df)
    if has_overlap(trees):
        overlap_n.append(n)

if overlap_n:
    print(f'WARNING: Overlaps found in N values: {overlap_n}')
else:
    print('No overlaps detected - submission is valid!')
    print(f'Final score: 67.772662')
    print(f'Target: 68.931058')
    print(f'BEATS TARGET BY: 1.158396')



In [11]:
# Repair overlaps by replacing with baseline configurations
import csv

def load_groups(filename):
    groups = {}
    with open(filename, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        header = next(reader)
        for row in reader:
            full_id = row[0]
            group = full_id.split('_')[0]
            groups.setdefault(group, []).append(row)
    return header, groups

def replace_group(target_file, donor_file, group_id, output_file=None):
    if output_file is None:
        output_file = target_file
    header_t, groups_t = load_groups(target_file)
    header_d, groups_d = load_groups(donor_file)
    if group_id not in groups_d:
        raise ValueError(f'Donor file missing group {group_id}')
    groups_t[group_id] = groups_d[group_id]
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(header_t)
        for g in sorted(groups_t.keys(), key=lambda x: int(x)):
            for row in groups_t[g]:
                writer.writerow(row)

# Use the baseline as donor for overlap repair
baseline_path = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv'
ensemble_path = '/home/submission/submission.csv'

print(f'Repairing {len(overlap_n)} overlapping configurations...')
for n in overlap_n:
    replace_group(ensemble_path, baseline_path, f'{n:03d}')
    print(f'  Replaced N={n}')

print('\\nRepair complete. Re-validating...')

Repairing 30 overlapping configurations...
  Replaced N=3
  Replaced N=4


  Replaced N=5
  Replaced N=6
  Replaced N=7


  Replaced N=8
  Replaced N=9
  Replaced N=10


  Replaced N=11
  Replaced N=12
  Replaced N=13


  Replaced N=14
  Replaced N=15
  Replaced N=16


  Replaced N=17
  Replaced N=18
  Replaced N=19


  Replaced N=20
  Replaced N=21
  Replaced N=22


  Replaced N=23
  Replaced N=24
  Replaced N=25


  Replaced N=27
  Replaced N=32
  Replaced N=33


  Replaced N=39
  Replaced N=43
  Replaced N=48


  Replaced N=60
\nRepair complete. Re-validating...


In [13]:
# Re-validate after repair
df = pd.read_csv('/home/submission/submission.csv')
overlap_n_after = []
for n in range(1, 201):
    trees = load_configuration_from_df(n, df)
    if has_overlap(trees):
        overlap_n_after.append(n)

if overlap_n_after:
    print(f'Still have overlaps in N values: {overlap_n_after}')
else:
    print('No overlaps detected after repair!')

# Calculate final score
df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
final_scores = {}
for n, g in df.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    final_scores[n] = score_group(xs, ys, ds, tx, ty)

final_total = sum(final_scores.values())
print(f'\nFinal score after repair: {final_total:.6f}')
print(f'Target: 68.931058')
if final_total < 68.931058:
    print(f'BEATS TARGET BY: {68.931058 - final_total:.6f}')
else:
    print(f'Gap to target: {final_total - 68.931058:.6f}')

No overlaps detected after repair!

Final score after repair: 70.734327
Target: 68.931058
Gap to target: 1.803269


In [14]:
# Better approach: Only use configurations that don't have overlaps
# Re-scan all submissions and validate each configuration before accepting

best_valid = {n: {'score': 1e300, 'data': None, 'src': None} for n in range(1, 201)}

for fp in submission_files:
    try:
        df = pd.read_csv(fp)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        
        # Check if values have 's' prefix
        sample_x = str(df['x'].iloc[0])
        has_s_prefix = sample_x.startswith('s')
        
        df = df.copy()
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        
        for n, g in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            try:
                if has_s_prefix:
                    xs = strip(g['x'].to_numpy())
                    ys = strip(g['y'].to_numpy())
                    ds = strip(g['deg'].to_numpy())
                else:
                    xs = g['x'].to_numpy().astype(np.float64)
                    ys = g['y'].to_numpy().astype(np.float64)
                    ds = g['deg'].to_numpy().astype(np.float64)
                
                sc = score_group(xs, ys, ds, tx, ty)
                
                # Only consider if better than current best
                if sc < best_valid[n]['score']:
                    # Validate for overlaps
                    trees = load_configuration_from_df(n, df)
                    if not has_overlap(trees):
                        best_valid[n]['score'] = float(sc)
                        best_valid[n]['data'] = g.drop(columns=['N']).copy()
                        best_valid[n]['src'] = fp
            except Exception as e:
                pass
    except Exception as e:
        pass

print('Best VALID scores found for each N:')
valid_ensemble_score = sum(best_valid[n]['score'] for n in range(1, 201) if best_valid[n]['data'] is not None)
print(f'Valid ensemble total score: {valid_ensemble_score:.6f}')

Best VALID scores found for each N:
Valid ensemble total score: 70.734327


In [16]:
# Check which sources contribute to the valid ensemble
valid_source_counts = {}
for n in range(1, 201):
    if best_valid[n]['src']:
        src = best_valid[n]['src']
        valid_source_counts[src] = valid_source_counts.get(src, 0) + 1

print('Sources contributing to valid ensemble:')
for src, count in sorted(valid_source_counts.items(), key=lambda x: -x[1]):
    print(f'  {count:3d} configs from {src}')

# Check the original best (with overlaps) to see how much better they are
print('\nComparing best (with overlaps) vs valid (no overlaps):')
for n in [3, 4, 5, 6, 7, 8, 9, 10]:
    best_score = best[n]['score']
    valid_score = best_valid[n]['score']
    print(f'  N={n}: best={best_score:.6f}, valid={valid_score:.6f}, diff={valid_score - best_score:.6f}')

Sources contributing to valid ensemble:
  159 configs from /home/nonroot/snapshots/santa-2025/21108486172/code/experiments/001_baseline/submission.csv
   34 configs from /home/nonroot/snapshots/santa-2025/21108486172/submission/submission.csv
    7 configs from /home/nonroot/snapshots/santa-2025/21105319338/code/experiments/001_baseline/submission.csv

Comparing best (with overlaps) vs valid (no overlaps):
  N=3: best=0.305312, valid=0.434745, diff=0.129433
  N=4: best=0.228621, valid=0.416545, diff=0.187924
  N=5: best=0.212694, valid=0.416850, diff=0.204155
  N=6: best=0.173625, valid=0.399610, diff=0.225985
  N=7: best=0.162010, valid=0.399897, diff=0.237887
  N=8: best=0.187564, valid=0.385407, diff=0.197844
  N=9: best=0.178013, valid=0.387415, diff=0.209402
  N=10: best=0.166604, valid=0.376630, diff=0.210026


In [17]:
# Check the ensemble_submission from the best snapshot
ensemble_snap_path = '/home/nonroot/snapshots/santa-2025/21108486172/code/experiments/001_baseline/ensemble_submission.csv'
df_snap = pd.read_csv(ensemble_snap_path)
df_snap['N'] = df_snap['id'].astype(str).str.split('_').str[0].astype(int)

# Calculate score
snap_scores = {}
for n, g in df_snap.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    snap_scores[n] = score_group(xs, ys, ds, tx, ty)

snap_total = sum(snap_scores.values())
print(f'Snapshot ensemble score: {snap_total:.6f}')

# Check for overlaps
snap_overlap_n = []
for n in range(1, 201):
    trees = load_configuration_from_df(n, df_snap)
    if has_overlap(trees):
        snap_overlap_n.append(n)

if snap_overlap_n:
    print(f'Overlaps in snapshot ensemble: {snap_overlap_n[:20]}...')
else:
    print('No overlaps in snapshot ensemble!')

Snapshot ensemble score: 67.772662


Overlaps in snapshot ensemble: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]...


In [18]:
# Check the submitted file from the snapshot
submitted_path = '/home/nonroot/snapshots/santa-2025/21108486172/submission/submission.csv'
df_sub = pd.read_csv(submitted_path)
df_sub['N'] = df_sub['id'].astype(str).str.split('_').str[0].astype(int)

# Calculate score
sub_scores = {}
for n, g in df_sub.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    sub_scores[n] = score_group(xs, ys, ds, tx, ty)

sub_total = sum(sub_scores.values())
print(f'Submitted file score: {sub_total:.6f}')

# Check for overlaps
sub_overlap_n = []
for n in range(1, 201):
    trees = load_configuration_from_df(n, df_sub)
    if has_overlap(trees):
        sub_overlap_n.append(n)

if sub_overlap_n:
    print(f'Overlaps in submitted file: {sub_overlap_n[:20]}...')
else:
    print('No overlaps in submitted file!')

Submitted file score: 70.734327


No overlaps in submitted file!


In [19]:
# Check the cpp_optimizer submission
cpp_path = '/home/nonroot/snapshots/santa-2025/21108486172/code/experiments/002_cpp_optimizer/submission_v21.csv'
df_cpp = pd.read_csv(cpp_path)
df_cpp['N'] = df_cpp['id'].astype(str).str.split('_').str[0].astype(int)

# Calculate score
cpp_scores = {}
for n, g in df_cpp.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    cpp_scores[n] = score_group(xs, ys, ds, tx, ty)

cpp_total = sum(cpp_scores.values())
print(f'C++ optimizer score: {cpp_total:.6f}')

# Check for overlaps
cpp_overlap_n = []
for n in range(1, 201):
    trees = load_configuration_from_df(n, df_cpp)
    if has_overlap(trees):
        cpp_overlap_n.append(n)

if cpp_overlap_n:
    print(f'Overlaps in C++ optimizer: {cpp_overlap_n[:20]}...')
else:
    print('No overlaps in C++ optimizer!')

C++ optimizer score: 70.734327


No overlaps in C++ optimizer!


In [21]:
# Find the source of the better configurations with overlaps
print('Sources of best (with overlaps) configurations for N=3-10:')
for n in [3, 4, 5, 6, 7, 8, 9, 10]:
    print(f'  N={n}: score={best[n]["score"]:.6f}, src={best[n]["src"]}')

Sources of best (with overlaps) configurations for N=3-10:
  N=3: score=0.305312, src=/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp19.csv
  N=4: score=0.228621, src=/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp20.csv
  N=5: score=0.212694, src=/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp19.csv
  N=6: score=0.173625, src=/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp19.csv
  N=7: score=0.162010, src=/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp19.csv
  N=8: score=0.187564, src=/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp20.csv
  N=9: score=0.178013, src=/home/nonroot/snapshots/santa-2025/21090949260/code/experiments/009_cpp_parallel_sa/submission_cpp20.csv
  N=10: score=0.1