# Evolver Loop 12 Analysis

## Key Questions:
1. Does the C++ optimizer output have overlaps?
2. Can we run longer C++ optimization to get more improvements?
3. What's the gap analysis showing?

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json

getcontext().prec = 30
SCALE_FACTOR = Decimal('1e18')

print('Setup complete')

Setup complete


In [2]:
# Tree shape vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, angle):
    x, y, angle = float(x), float(y), float(angle)
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def create_high_precision_tree(x, y, angle):
    x = Decimal(str(x))
    y = Decimal(str(y))
    angle = Decimal(str(angle))
    sf = SCALE_FACTOR
    vertices = [
        (float(Decimal('0.0') * sf), float(Decimal('0.8') * sf)),
        (float(Decimal('0.125') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('0.0625') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('0.2') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('0.1') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('0.35') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('0.075') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('0.075') * sf), float(Decimal('-0.2') * sf)),
        (float(Decimal('-0.075') * sf), float(Decimal('-0.2') * sf)),
        (float(Decimal('-0.075') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('-0.35') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('-0.1') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('-0.2') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('-0.0625') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('-0.125') * sf), float(Decimal('0.5') * sf)),
    ]
    poly = Polygon(vertices)
    poly = affinity.rotate(poly, float(angle), origin=(0, 0))
    poly = affinity.translate(poly, xoff=float(x * sf), yoff=float(y * sf))
    return poly

def validate_no_overlap_strict(trees_data):
    if len(trees_data) <= 1:
        return True, []
    polygons = [create_high_precision_tree(t['x'], t['y'], t['deg']) for t in trees_data]
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                overlaps.append((i, j))
    return len(overlaps) == 0, overlaps

def get_bbox_side(trees):
    if len(trees) == 0:
        return 0
    polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    union = unary_union(polygons)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def get_score(trees, n):
    side = get_bbox_side(trees)
    return (side ** 2) / n

print('Core functions defined')

Core functions defined


In [3]:
# Load C++ optimizer output
print('Loading C++ optimizer output...')
cpp_df = pd.read_csv('/home/code/submission_v18.csv')
cpp_df['N'] = cpp_df['id'].astype(str).str.split('_').str[0].astype(int)

cpp_trees = {}
cpp_scores = {}

for n, g in cpp_df.groupby('N'):
    trees = []
    for _, row in g.iterrows():
        x = str(row['x']).replace('s', '')
        y = str(row['y']).replace('s', '')
        deg = str(row['deg']).replace('s', '')
        trees.append({'x': x, 'y': y, 'deg': deg})
    cpp_trees[n] = trees
    cpp_scores[n] = get_score(trees, n)

cpp_total = sum(cpp_scores.values())
print(f'C++ optimizer score: {cpp_total:.6f}')

Loading C++ optimizer output...


C++ optimizer score: 70.340889


In [4]:
# Check for overlaps in C++ output
print('\nChecking for overlaps in C++ output...')
overlap_ns = []
for n in range(1, 201):
    valid, overlaps = validate_no_overlap_strict(cpp_trees[n])
    if not valid:
        overlap_ns.append(n)
        if len(overlap_ns) <= 10:
            print(f'  N={n}: {len(overlaps)} overlapping pairs')

print(f'\nTotal N values with overlaps: {len(overlap_ns)}')
if overlap_ns:
    print(f'Overlap N values: {overlap_ns[:20]}...')


Checking for overlaps in C++ output...
  N=7: 1 overlapping pairs
  N=8: 2 overlapping pairs
  N=11: 2 overlapping pairs
  N=13: 2 overlapping pairs
  N=16: 1 overlapping pairs
  N=17: 1 overlapping pairs
  N=18: 1 overlapping pairs
  N=19: 1 overlapping pairs
  N=29: 2 overlapping pairs
  N=30: 1 overlapping pairs



Total N values with overlaps: 62
Overlap N values: [7, 8, 11, 13, 16, 17, 18, 19, 29, 30, 40, 42, 48, 56, 60, 61, 62, 66, 71, 79]...


In [5]:
# Load baseline (exp_012) for comparison
print('\nLoading baseline (exp_012)...')
baseline_df = pd.read_csv('/home/submission/submission.csv')
baseline_df['N'] = baseline_df['id'].astype(str).str.split('_').str[0].astype(int)

baseline_trees = {}
baseline_scores = {}

for n, g in baseline_df.groupby('N'):
    trees = []
    for _, row in g.iterrows():
        x = str(row['x']).replace('s', '')
        y = str(row['y']).replace('s', '')
        deg = str(row['deg']).replace('s', '')
        trees.append({'x': x, 'y': y, 'deg': deg})
    baseline_trees[n] = trees
    baseline_scores[n] = get_score(trees, n)

baseline_total = sum(baseline_scores.values())
print(f'Baseline score: {baseline_total:.6f}')
print(f'C++ optimizer score: {cpp_total:.6f}')
print(f'Improvement: {baseline_total - cpp_total:.6f}')


Loading baseline (exp_012)...


Baseline score: 70.340917
C++ optimizer score: 70.340889
Improvement: 0.000029


In [6]:
# Compare per-N scores
print('\nPer-N comparison (C++ vs baseline):')
improvements = []
for n in range(1, 201):
    diff = baseline_scores[n] - cpp_scores[n]
    if diff > 1e-9:
        improvements.append((n, diff))

print(f'N values improved by C++: {len(improvements)}')
if improvements:
    print('\nTop 20 improvements:')
    for n, diff in sorted(improvements, key=lambda x: -x[1])[:20]:
        print(f'  N={n:3d}: +{diff:.6f}')


Per-N comparison (C++ vs baseline):
N values improved by C++: 10

Top 20 improvements:
  N= 35: +0.000008
  N= 88: +0.000007
  N= 26: +0.000004
  N=124: +0.000003
  N= 36: +0.000003
  N=170: +0.000001
  N=134: +0.000001
  N=164: +0.000001
  N= 76: +0.000000
  N= 53: +0.000000


In [7]:
# Create ensemble: use C++ where it's better AND has no overlaps
print('\n' + '='*60)
print('CREATING ENSEMBLE')
print('='*60)

ensemble_trees = {}
ensemble_scores = {}
ensemble_sources = {}

for n in range(1, 201):
    # Check if C++ is better
    if cpp_scores[n] < baseline_scores[n] - 1e-9:
        # Check if C++ has no overlaps
        valid, _ = validate_no_overlap_strict(cpp_trees[n])
        if valid:
            ensemble_trees[n] = cpp_trees[n]
            ensemble_scores[n] = cpp_scores[n]
            ensemble_sources[n] = 'cpp'
        else:
            ensemble_trees[n] = baseline_trees[n]
            ensemble_scores[n] = baseline_scores[n]
            ensemble_sources[n] = 'baseline (cpp had overlaps)'
    else:
        ensemble_trees[n] = baseline_trees[n]
        ensemble_scores[n] = baseline_scores[n]
        ensemble_sources[n] = 'baseline'

ensemble_total = sum(ensemble_scores.values())
print(f'\nBaseline score: {baseline_total:.6f}')
print(f'C++ optimizer score: {cpp_total:.6f}')
print(f'Ensemble score: {ensemble_total:.6f}')
print(f'Improvement over baseline: {baseline_total - ensemble_total:.6f}')

# Count sources
cpp_count = sum(1 for s in ensemble_sources.values() if s == 'cpp')
baseline_count = sum(1 for s in ensemble_sources.values() if s == 'baseline')
overlap_fallback = sum(1 for s in ensemble_sources.values() if 'overlaps' in s)
print(f'\nSources: {cpp_count} from C++, {baseline_count} from baseline, {overlap_fallback} fallback due to overlaps')


CREATING ENSEMBLE



Baseline score: 70.340917
C++ optimizer score: 70.340889
Ensemble score: 70.340889
Improvement over baseline: 0.000029

Sources: 10 from C++, 190 from baseline, 0 fallback due to overlaps


In [8]:
# Final validation
print('\n' + '='*60)
print('FINAL VALIDATION')
print('='*60)

final_overlaps = []
for n in range(1, 201):
    valid, _ = validate_no_overlap_strict(ensemble_trees[n])
    if not valid:
        final_overlaps.append(n)

if final_overlaps:
    print(f'WARNING: {len(final_overlaps)} N values have overlaps: {final_overlaps[:10]}...')
else:
    print('All N values pass strict validation!')


FINAL VALIDATION


All N values pass strict validation!


In [9]:
# Save ensemble submission
print('\n' + '='*60)
print('SAVING ENSEMBLE SUBMISSION')
print('='*60)

rows = []
for n in range(1, 201):
    trees = ensemble_trees[n]
    for i, t in enumerate(trees):
        x_val = str(t['x']).replace('s', '')
        y_val = str(t['y']).replace('s', '')
        deg_val = str(t['deg']).replace('s', '')
        rows.append({
            'id': f'{n:03d}_{i}',
            'x': f's{x_val}',
            'y': f's{y_val}',
            'deg': f's{deg_val}'
        })

submission_df = pd.DataFrame(rows)
print(f'Submission shape: {submission_df.shape}')

submission_df.to_csv('/home/code/experiments/013_cpp_ensemble/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print('Submission saved!')

print(f'\nFinal score: {ensemble_total:.6f}')
print(f'Target: 68.879235')
print(f'Gap: {ensemble_total - 68.879235:.6f}')


SAVING ENSEMBLE SUBMISSION
Submission shape: (20100, 4)
Submission saved!

Final score: 70.340889
Target: 68.879235
Gap: 1.461654


In [10]:
# Load longer C++ run output
print('Loading longer C++ run output...')
cpp_df2 = pd.read_csv('/home/code/submission_v18.csv')
cpp_df2['N'] = cpp_df2['id'].astype(str).str.split('_').str[0].astype(int)

cpp_trees2 = {}
cpp_scores2 = {}

for n, g in cpp_df2.groupby('N'):
    trees = []
    for _, row in g.iterrows():
        x = str(row['x']).replace('s', '')
        y = str(row['y']).replace('s', '')
        deg = str(row['deg']).replace('s', '')
        trees.append({'x': x, 'y': y, 'deg': deg})
    cpp_trees2[n] = trees
    cpp_scores2[n] = get_score(trees, n)

cpp_total2 = sum(cpp_scores2.values())
print(f'Longer C++ optimizer score: {cpp_total2:.6f}')

# Check for overlaps
print('\nChecking for overlaps...')
overlap_ns2 = []
for n in range(1, 201):
    valid, overlaps = validate_no_overlap_strict(cpp_trees2[n])
    if not valid:
        overlap_ns2.append(n)

print(f'Total N values with overlaps: {len(overlap_ns2)}')

# Create ensemble
print('\nCreating ensemble...')
ensemble_trees2 = {}
ensemble_scores2 = {}
cpp_used = 0
for n in range(1, 201):
    if cpp_scores2[n] < baseline_scores[n] - 1e-9:
        valid, _ = validate_no_overlap_strict(cpp_trees2[n])
        if valid:
            ensemble_trees2[n] = cpp_trees2[n]
            ensemble_scores2[n] = cpp_scores2[n]
            cpp_used += 1
        else:
            ensemble_trees2[n] = baseline_trees[n]
            ensemble_scores2[n] = baseline_scores[n]
    else:
        ensemble_trees2[n] = baseline_trees[n]
        ensemble_scores2[n] = baseline_scores[n]

ensemble_total2 = sum(ensemble_scores2.values())
print(f'\nBaseline score: {baseline_total:.6f}')
print(f'Longer C++ score: {cpp_total2:.6f}')
print(f'Ensemble score: {ensemble_total2:.6f}')
print(f'Improvement: {baseline_total - ensemble_total2:.6f}')
print(f'N values from C++: {cpp_used}')

Loading longer C++ run output...


Longer C++ optimizer score: 70.340793

Checking for overlaps...


Total N values with overlaps: 63

Creating ensemble...



Baseline score: 70.340917
Longer C++ score: 70.340793
Ensemble score: 70.340793
Improvement: 0.000125
N values from C++: 12


In [12]:
# Save the ensemble with C++ improvements
print('Saving ensemble submission...')

rows = []
for n in range(1, 201):
    trees = ensemble_trees2[n]
    for i, t in enumerate(trees):
        x_val = str(t['x']).replace('s', '')
        y_val = str(t['y']).replace('s', '')
        deg_val = str(t['deg']).replace('s', '')
        rows.append({
            'id': f'{n:03d}_{i}',
            'x': f's{x_val}',
            'y': f's{y_val}',
            'deg': f's{deg_val}'
        })

submission_df = pd.DataFrame(rows)
submission_df.to_csv('/home/code/experiments/013_cpp_ensemble/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)

print(f'Submission saved!')
print(f'Final score: {ensemble_total2:.6f}')
print(f'Target: 68.879235')
print(f'Gap: {ensemble_total2 - 68.879235:.6f}')

# Save metrics
metrics = {
    'cv_score': ensemble_total2,
    'baseline_score': baseline_total,
    'improvement': baseline_total - ensemble_total2,
    'cpp_n_improved': cpp_used,
    'cpp_overlaps': len(overlap_ns2),
    'target': 68.879235,
    'gap': ensemble_total2 - 68.879235
}

with open('/home/code/experiments/013_cpp_ensemble/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print('\nMetrics saved!')
print(json.dumps(metrics, indent=2))

Saving ensemble submission...
Submission saved!
Final score: 70.340793
Target: 68.879235
Gap: 1.461558

Metrics saved!
{
  "cv_score": 70.34079283179824,
  "baseline_score": 70.34091744807057,
  "improvement": 0.0001246162723305133,
  "cpp_n_improved": 12,
  "cpp_overlaps": 63,
  "target": 68.879235,
  "gap": 1.4615578317982454
}
