# Evolver Loop 3 Analysis

## Situation Analysis

Current best: 70.676102
Target: 68.919154
Gap: 1.756948 (2.49%)

## Key Questions:
1. What is the per-N breakdown of the gap?
2. Which N values have the most room for improvement?
3. Are there better solutions in the pre-optimized CSVs that we haven't used?

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import glob
import os

getcontext().prec = 25
scale_factor = Decimal('1e15')

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print('Setup complete')

Setup complete


In [2]:
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        vertices = []
        for tx, ty in zip(TX, TY):
            vertices.append((float(Decimal(str(tx)) * scale_factor), 
                           float(Decimal(str(ty)) * scale_factor)))
        initial_polygon = Polygon(vertices)
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated,
            xoff=float(self.center_x * scale_factor),
            yoff=float(self.center_y * scale_factor)
        )

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def calculate_side(trees):
    if not trees:
        return Decimal('0')
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    return max(maxx - minx, maxy - miny)

def calculate_score_for_n(trees, n):
    side = calculate_side(trees)
    return float(side * side / n)

print('Functions defined')

Functions defined


In [3]:
# Load baseline
baseline_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv/santa-2025.csv'
baseline_df = pd.read_csv(baseline_path)
print(f'Loaded baseline: {len(baseline_df)} rows')

# Calculate per-N scores
baseline_scores = {}
for n in range(1, 201):
    trees = load_trees_for_n(baseline_df, n)
    baseline_scores[n] = calculate_score_for_n(trees, n)

total_baseline = sum(baseline_scores.values())
print(f'\nTotal baseline score: {total_baseline:.6f}')
print(f'Target: 68.919154')
print(f'Gap: {total_baseline - 68.919154:.6f}')

Loaded baseline: 20100 rows



Total baseline score: 70.676102
Target: 68.919154
Gap: 1.756948


In [None]:
# Load all available pre-optimized CSVs and find best per-N
csv_paths = glob.glob('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/**/*.csv', recursive=True)
print(f'Found {len(csv_paths)} CSV files')

# Load each CSV and calculate scores
all_scores = {}  # {path: {n: score}}

for path in csv_paths:
    try:
        df = pd.read_csv(path)
        if 'id' not in df.columns or 'x' not in df.columns:
            continue
        scores = {}
        for n in range(1, 201):
            trees = load_trees_for_n(df, n)
            if len(trees) == n:
                scores[n] = calculate_score_for_n(trees, n)
        if len(scores) == 200:
            all_scores[path] = scores
            total = sum(scores.values())
            print(f'{os.path.basename(path)}: {total:.6f}')
    except Exception as e:
        pass

print(f'\nLoaded {len(all_scores)} valid CSV files')

In [None]:
# Find best score per N across all CSVs
best_per_n = {}
best_source_per_n = {}

for n in range(1, 201):
    best_score = float('inf')
    best_source = None
    for path, scores in all_scores.items():
        if n in scores and scores[n] < best_score:
            best_score = scores[n]
            best_source = path
    best_per_n[n] = best_score
    best_source_per_n[n] = best_source

# Calculate best possible ensemble score
best_ensemble_score = sum(best_per_n.values())
print(f'Best possible ensemble score: {best_ensemble_score:.6f}')
print(f'Target: 68.919154')
print(f'Gap to target: {best_ensemble_score - 68.919154:.6f}')
print(f'\nBaseline score: {total_baseline:.6f}')
print(f'Improvement from ensemble: {total_baseline - best_ensemble_score:.6f}')

In [None]:
# Find N values where baseline is NOT the best
improvable_ns = []
for n in range(1, 201):
    if best_per_n[n] < baseline_scores[n] - 1e-9:
        improvement = baseline_scores[n] - best_per_n[n]
        improvable_ns.append((n, improvement, baseline_scores[n], best_per_n[n], best_source_per_n[n]))

if improvable_ns:
    print(f'Found {len(improvable_ns)} N values where better solutions exist:')
    improvable_ns.sort(key=lambda x: -x[1])  # Sort by improvement
    for n, imp, base, best, source in improvable_ns[:20]:
        print(f'  N={n}: baseline={base:.6f}, best={best:.6f}, improvement={imp:.9f}, source={os.path.basename(source)}')
else:
    print('Baseline is already the best for all N values!')

In [None]:
# Analyze score contribution by N range
ranges = [
    (1, 10, 'Small (1-10)'),
    (11, 50, 'Medium (11-50)'),
    (51, 100, 'Large (51-100)'),
    (101, 200, 'Very Large (101-200)')
]

print('Score contribution by N range:')
print('='*60)
for start, end, name in ranges:
    baseline_sum = sum(baseline_scores[n] for n in range(start, end+1))
    best_sum = sum(best_per_n[n] for n in range(start, end+1))
    improvement = baseline_sum - best_sum
    pct = baseline_sum / total_baseline * 100
    print(f'{name}:')
    print(f'  Baseline: {baseline_sum:.6f} ({pct:.1f}% of total)')
    print(f'  Best available: {best_sum:.6f}')
    print(f'  Potential improvement: {improvement:.6f}')
    print()

In [None]:
# Summary
print('='*60)
print('SUMMARY')
print('='*60)
print(f'Current baseline: {total_baseline:.6f}')
print(f'Best ensemble from available CSVs: {best_ensemble_score:.6f}')
print(f'Target: 68.919154')
print(f'\nGap analysis:')
print(f'  Baseline to target: {total_baseline - 68.919154:.6f}')
print(f'  Best ensemble to target: {best_ensemble_score - 68.919154:.6f}')
print(f'\nConclusion:')
if best_ensemble_score < total_baseline - 0.001:
    print(f'  There are better solutions available! Ensemble could improve by {total_baseline - best_ensemble_score:.6f}')
else:
    print(f'  Baseline is already the best available from pre-optimized CSVs.')
    print(f'  Need to run longer optimization or try fundamentally different approaches.')