# Evolver Loop 2 Analysis

## Goal: Understand the gap to target and plan next experiment

**Target Score:** 68.922808
**Current Best:** 70.676099 (gap: 1.75 points)

## Key Insights from Evaluator:
1. The bbox3 optimizer uses perturbations too coarse (0.01-0.1 units)
2. Need **fractional translation** with micro-adjustments (0.001-0.00001 units)
3. Need **ensemble approach** - best config for each N from multiple sources
4. Need **per-N analysis** to identify worst-performing configurations

In [1]:
import pandas as pd
import numpy as np
import os
import glob

# List all available submissions
print("=== Available Submissions ===")
submission_files = [
    '/home/code/preoptimized/santa-2025.csv',
    '/home/code/preoptimized/ensemble.csv',
    '/home/code/preoptimized/bucket-of-chump/submission.csv',
    '/home/code/preoptimized/telegram/71.97.csv',
    '/home/code/preoptimized/telegram/72.49.csv',
]

for f in submission_files:
    if os.path.exists(f):
        df = pd.read_csv(f)
        print(f"{f}: {len(df)} rows")
    else:
        print(f"{f}: NOT FOUND")

=== Available Submissions ===
/home/code/preoptimized/santa-2025.csv: 20100 rows
/home/code/preoptimized/ensemble.csv: 20100 rows
/home/code/preoptimized/bucket-of-chump/submission.csv: 20100 rows
/home/code/preoptimized/telegram/71.97.csv: 20100 rows
/home/code/preoptimized/telegram/72.49.csv: 20100 rows


In [2]:
# Scoring functions
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import math

getcontext().prec = 30

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(0), float(tip_y)),
            (float(top_w / 2), float(tier_1_y)),
            (float(top_w / 4), float(tier_1_y)),
            (float(mid_w / 2), float(tier_2_y)),
            (float(mid_w / 4), float(tier_2_y)),
            (float(base_w / 2), float(base_y)),
            (float(trunk_w / 2), float(base_y)),
            (float(trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(base_y)),
            (float(-base_w / 2), float(base_y)),
            (float(-mid_w / 4), float(tier_2_y)),
            (float(-mid_w / 2), float(tier_2_y)),
            (float(-top_w / 4), float(tier_1_y)),
            (float(-top_w / 2), float(tier_1_y)),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        x = str(row['x']).lstrip('s')
        y = str(row['y']).lstrip('s')
        deg = str(row['deg']).lstrip('s')
        trees.append(ChristmasTree(x, y, deg))
    return trees

def get_bounding_box_side(trees):
    if not trees:
        return 0
    all_coords = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_coords.append(coords)
    all_coords = np.vstack(all_coords)
    x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
    y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
    return max(x_range, y_range)

def score_n(df, n):
    trees = load_trees_for_n(df, n)
    if len(trees) != n:
        return None
    side = get_bounding_box_side(trees)
    return (side ** 2) / n

print("Scoring functions defined")

Scoring functions defined


In [3]:
# Score all submissions and compare per-N
print("Scoring all submissions (this takes a few minutes)...")

submissions = {}
for f in submission_files:
    if os.path.exists(f):
        name = os.path.basename(f).replace('.csv', '')
        submissions[name] = pd.read_csv(f)
        print(f"Loaded {name}")

# Calculate scores for each N
scores_per_n = {name: {} for name in submissions}

for name, df in submissions.items():
    print(f"\nScoring {name}...")
    total = 0
    for n in range(1, 201):
        sc = score_n(df, n)
        if sc is not None:
            scores_per_n[name][n] = sc
            total += sc
    print(f"  Total: {total:.6f}")

Scoring all submissions (this takes a few minutes)...
Loaded santa-2025
Loaded ensemble
Loaded submission
Loaded 71.97
Loaded 72.49

Scoring santa-2025...


  Total: 70.676102

Scoring ensemble...


  Total: 70.676102

Scoring submission...


  Total: 70.676501

Scoring 71.97...


  Total: 71.972027

Scoring 72.49...


  Total: 72.495739


In [4]:
# Create ensemble - best config for each N
print("\n=== Creating Ensemble ===")

best_per_n = {}
best_source_per_n = {}

for n in range(1, 201):
    best_score = float('inf')
    best_source = None
    for name in scores_per_n:
        if n in scores_per_n[name] and scores_per_n[name][n] < best_score:
            best_score = scores_per_n[name][n]
            best_source = name
    best_per_n[n] = best_score
    best_source_per_n[n] = best_source

ensemble_total = sum(best_per_n.values())
print(f"\nEnsemble total score: {ensemble_total:.6f}")
print(f"Gap to target (68.922808): {ensemble_total - 68.922808:.6f}")

# Count sources
from collections import Counter
source_counts = Counter(best_source_per_n.values())
print(f"\nSource distribution:")
for source, count in source_counts.most_common():
    print(f"  {source}: {count} configs")


=== Creating Ensemble ===

Ensemble total score: 70.676102
Gap to target (68.922808): 1.753294

Source distribution:
  santa-2025: 200 configs


In [5]:
# Identify worst-performing N values (highest score contribution)
print("\n=== Worst-Performing N Values (Highest Score Contribution) ===")

# Sort by score contribution
sorted_n = sorted(best_per_n.items(), key=lambda x: x[1], reverse=True)

print("\nTop 20 worst N values:")
print(f"{'N':>5} {'Score':>12} {'Side':>10} {'Source':>20}")
print("-" * 50)
for n, score in sorted_n[:20]:
    side = np.sqrt(score * n)
    print(f"{n:>5} {score:>12.6f} {side:>10.4f} {best_source_per_n[n]:>20}")


=== Worst-Performing N Values (Highest Score Contribution) ===

Top 20 worst N values:
    N        Score       Side               Source
--------------------------------------------------
    1     0.661250     0.8132           santa-2025
    2     0.450779     0.9495           santa-2025
    3     0.434745     1.1420           santa-2025
    5     0.416850     1.4437           santa-2025
    4     0.416545     1.2908           santa-2025
    7     0.399897     1.6731           santa-2025
    6     0.399610     1.5484           santa-2025
    9     0.387415     1.8673           santa-2025
    8     0.385407     1.7559           santa-2025
   15     0.379203     2.3850           santa-2025
   10     0.376630     1.9407           santa-2025
   21     0.376451     2.8117           santa-2025
   20     0.376057     2.7425           santa-2025
   11     0.375736     2.0330           santa-2025
   22     0.375258     2.8733           santa-2025
   16     0.374128     2.4466           santa

In [6]:
# Calculate potential improvement from fractional translation
print("\n=== Potential Improvement Analysis ===")

# If we could reduce each side by 0.001 (micro-adjustment), how much would score improve?
test_reduction = 0.001

total_potential = 0
for n in range(1, 201):
    current_score = best_per_n[n]
    current_side = np.sqrt(current_score * n)
    new_side = current_side - test_reduction
    if new_side > 0:
        new_score = (new_side ** 2) / n
        improvement = current_score - new_score
        total_potential += improvement

print(f"If we reduce each side by {test_reduction}:")
print(f"  Total potential improvement: {total_potential:.6f}")
print(f"  New ensemble score: {ensemble_total - total_potential:.6f}")

# More aggressive reduction
test_reduction = 0.01
total_potential = 0
for n in range(1, 201):
    current_score = best_per_n[n]
    current_side = np.sqrt(current_score * n)
    new_side = current_side - test_reduction
    if new_side > 0:
        new_score = (new_side ** 2) / n
        improvement = current_score - new_score
        total_potential += improvement

print(f"\nIf we reduce each side by {test_reduction}:")
print(f"  Total potential improvement: {total_potential:.6f}")
print(f"  New ensemble score: {ensemble_total - total_potential:.6f}")


=== Potential Improvement Analysis ===
If we reduce each side by 0.001:
  Total potential improvement: 0.032764
  New ensemble score: 70.643338

If we reduce each side by 0.01:
  Total potential improvement: 0.327115
  New ensemble score: 70.348988


In [7]:
# Build the ensemble submission
print("\n=== Building Ensemble Submission ===")

ensemble_rows = []

for n in range(1, 201):
    best_source = best_source_per_n[n]
    df = submissions[best_source]
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)].copy()
    ensemble_rows.append(subset)

ensemble_df = pd.concat(ensemble_rows, ignore_index=True)
print(f"Ensemble shape: {ensemble_df.shape}")

# Save ensemble
ensemble_df.to_csv('/home/code/preoptimized/best_ensemble.csv', index=False)
print("Saved to /home/code/preoptimized/best_ensemble.csv")

# Verify score
verify_total = 0
for n in range(1, 201):
    sc = score_n(ensemble_df, n)
    if sc is not None:
        verify_total += sc
print(f"Verified ensemble score: {verify_total:.6f}")


=== Building Ensemble Submission ===


Ensemble shape: (20100, 4)
Saved to /home/code/preoptimized/best_ensemble.csv


Verified ensemble score: 70.676102
