# Evolver Loop 2 - LB Feedback Analysis

**LB Score: 117.2815** (CV: 117.2815, gap: 0.0000)

The CV-LB gap is essentially zero, which is expected for this optimization problem (no train/test split).

## Key Evaluator Feedback:
1. **ENSEMBLE APPROACH NOT IMPLEMENTED** - This is the #1 technique in top kernels
2. **No external solution datasets used** - Pre-computed solutions may already be better
3. **bbox3 binary not used** - Primary optimizer in winning solutions
4. **Limited optimization time** - Top kernels use 3-hour budgets

## Analysis Goals:
1. Scan all available snapshot CSVs and score them
2. Build an ensemble baseline from best configurations
3. Identify the gap between ensemble baseline and target

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import os
import glob
from tqdm import tqdm

getcontext().prec = 25
scale_factor = Decimal("1e18")

print("Libraries loaded")

Libraries loaded


In [2]:
# ChristmasTree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

print("ChristmasTree class defined")

ChristmasTree class defined


In [3]:
# Fast scoring using numpy (no Shapely for speed)
def fast_score_group(xs, ys, degs, n):
    """Fast scoring using numpy - no polygon construction needed for bounding box."""
    # Tree template vertices
    tx = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
    ty = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])
    
    all_x = []
    all_y = []
    
    for i in range(len(xs)):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        # Rotate and translate
        rx = c * tx - s * ty + xs[i]
        ry = s * tx + c * ty + ys[i]
        all_x.extend(rx)
        all_y.extend(ry)
    
    all_x = np.array(all_x)
    all_y = np.array(all_y)
    
    side = max(all_x.max() - all_x.min(), all_y.max() - all_y.min())
    return side**2 / n

def strip_s(val):
    """Remove 's' prefix from string values."""
    s = str(val)
    return float(s[1:] if s.startswith('s') else s)

def score_csv(filepath):
    """Score a submission CSV file."""
    try:
        df = pd.read_csv(filepath)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            return None, None
        
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        
        total_score = 0.0
        scores_per_n = {}
        
        for n, group in df.groupby('N'):
            if n < 1 or n > 200:
                continue
            xs = np.array([strip_s(v) for v in group['x']])
            ys = np.array([strip_s(v) for v in group['y']])
            degs = np.array([strip_s(v) for v in group['deg']])
            
            if len(xs) != n:
                continue
                
            score = fast_score_group(xs, ys, degs, n)
            scores_per_n[n] = score
            total_score += score
        
        return total_score, scores_per_n
    except Exception as e:
        return None, None

print("Scoring functions defined")

Scoring functions defined


In [4]:
# Find all available CSV files
csv_files = []

# Our current candidates
csv_files.extend(glob.glob('/home/code/submission_candidates/*.csv'))

# Snapshot files
csv_files.extend(glob.glob('/home/nonroot/snapshots/santa-2025/*/code/submission_candidates/*.csv'))
csv_files.extend(glob.glob('/home/nonroot/snapshots/santa-2025/*/code/*.csv'))
csv_files.extend(glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/*.csv'))
csv_files.extend(glob.glob('/home/nonroot/snapshots/santa-2025/*/code/experiments/*/*.csv'))

# Remove duplicates
csv_files = list(set(csv_files))
print(f"Found {len(csv_files)} CSV files to analyze")

Found 81 CSV files to analyze


In [5]:
# Score all CSV files
results = []

for filepath in tqdm(csv_files, desc="Scoring CSVs"):
    total_score, scores_per_n = score_csv(filepath)
    if total_score is not None:
        results.append({
            'filepath': filepath,
            'total_score': total_score,
            'scores_per_n': scores_per_n
        })

# Sort by score
results.sort(key=lambda x: x['total_score'])

print(f"\nSuccessfully scored {len(results)} files")
print("\nTop 10 best scores:")
for i, r in enumerate(results[:10]):
    print(f"{i+1}. {r['total_score']:.6f} - {r['filepath'].split('/')[-1]}")

Scoring CSVs:   0%|          | 0/81 [00:00<?, ?it/s]

Scoring CSVs:   1%|          | 1/81 [00:00<00:20,  3.90it/s]

Scoring CSVs:   2%|▏         | 2/81 [00:00<00:20,  3.86it/s]

Scoring CSVs:   4%|▎         | 3/81 [00:00<00:20,  3.71it/s]

Scoring CSVs:   5%|▍         | 4/81 [00:01<00:20,  3.78it/s]

Scoring CSVs:   6%|▌         | 5/81 [00:01<00:19,  3.85it/s]

Scoring CSVs:   7%|▋         | 6/81 [00:01<00:19,  3.86it/s]

Scoring CSVs:   9%|▊         | 7/81 [00:01<00:19,  3.76it/s]

Scoring CSVs:  10%|▉         | 8/81 [00:02<00:19,  3.81it/s]

Scoring CSVs:  11%|█         | 9/81 [00:02<00:18,  3.86it/s]

Scoring CSVs:  12%|█▏        | 10/81 [00:02<00:18,  3.90it/s]

Scoring CSVs:  14%|█▎        | 11/81 [00:02<00:18,  3.82it/s]

Scoring CSVs:  15%|█▍        | 12/81 [00:03<00:18,  3.76it/s]

Scoring CSVs:  16%|█▌        | 13/81 [00:03<00:18,  3.74it/s]

Scoring CSVs:  17%|█▋        | 14/81 [00:03<00:17,  3.81it/s]

Scoring CSVs:  19%|█▊        | 15/81 [00:03<00:17,  3.76it/s]

Scoring CSVs:  20%|█▉        | 16/81 [00:04<00:16,  3.84it/s]

Scoring CSVs:  21%|██        | 17/81 [00:04<00:16,  3.78it/s]

Scoring CSVs:  22%|██▏       | 18/81 [00:04<00:16,  3.75it/s]

Scoring CSVs:  23%|██▎       | 19/81 [00:04<00:16,  3.82it/s]

Scoring CSVs:  25%|██▍       | 20/81 [00:05<00:15,  3.87it/s]

Scoring CSVs:  26%|██▌       | 21/81 [00:05<00:15,  3.80it/s]

Scoring CSVs:  27%|██▋       | 22/81 [00:05<00:15,  3.86it/s]

Scoring CSVs:  28%|██▊       | 23/81 [00:06<00:14,  3.87it/s]

Scoring CSVs:  30%|██▉       | 24/81 [00:06<00:14,  3.90it/s]

Scoring CSVs:  31%|███       | 25/81 [00:06<00:14,  3.93it/s]

Scoring CSVs:  32%|███▏      | 26/81 [00:06<00:14,  3.84it/s]

Scoring CSVs:  33%|███▎      | 27/81 [00:07<00:14,  3.77it/s]

Scoring CSVs:  35%|███▍      | 28/81 [00:07<00:13,  3.84it/s]

Scoring CSVs:  36%|███▌      | 29/81 [00:07<00:13,  3.79it/s]

Scoring CSVs:  37%|███▋      | 30/81 [00:07<00:13,  3.75it/s]

Scoring CSVs:  38%|███▊      | 31/81 [00:08<00:13,  3.73it/s]

Scoring CSVs:  40%|███▉      | 32/81 [00:08<00:12,  3.81it/s]

Scoring CSVs:  41%|████      | 33/81 [00:08<00:12,  3.85it/s]

Scoring CSVs:  42%|████▏     | 34/81 [00:08<00:12,  3.86it/s]

Scoring CSVs:  43%|████▎     | 35/81 [00:09<00:12,  3.79it/s]

Scoring CSVs:  44%|████▍     | 36/81 [00:09<00:11,  3.76it/s]

Scoring CSVs:  46%|████▌     | 37/81 [00:09<00:11,  3.82it/s]

Scoring CSVs:  47%|████▋     | 38/81 [00:09<00:11,  3.87it/s]

Scoring CSVs:  48%|████▊     | 39/81 [00:10<00:11,  3.79it/s]

Scoring CSVs:  49%|████▉     | 40/81 [00:10<00:10,  3.81it/s]

Scoring CSVs:  51%|█████     | 41/81 [00:10<00:10,  3.87it/s]

Scoring CSVs:  52%|█████▏    | 42/81 [00:10<00:09,  3.91it/s]

Scoring CSVs:  53%|█████▎    | 43/81 [00:11<00:09,  3.81it/s]

Scoring CSVs:  54%|█████▍    | 44/81 [00:11<00:09,  3.72it/s]

Scoring CSVs:  56%|█████▌    | 45/81 [00:11<00:09,  3.65it/s]

Scoring CSVs:  57%|█████▋    | 46/81 [00:12<00:09,  3.65it/s]

Scoring CSVs:  58%|█████▊    | 47/81 [00:12<00:09,  3.65it/s]

Scoring CSVs:  59%|█████▉    | 48/81 [00:12<00:08,  3.75it/s]

Scoring CSVs:  60%|██████    | 49/81 [00:12<00:08,  3.62it/s]

Scoring CSVs:  62%|██████▏   | 50/81 [00:13<00:08,  3.72it/s]

Scoring CSVs:  63%|██████▎   | 51/81 [00:13<00:07,  3.80it/s]

Scoring CSVs:  64%|██████▍   | 52/81 [00:13<00:07,  3.86it/s]

Scoring CSVs:  65%|██████▌   | 53/81 [00:13<00:07,  3.76it/s]

Scoring CSVs:  67%|██████▋   | 54/81 [00:14<00:07,  3.82it/s]

Scoring CSVs:  68%|██████▊   | 55/81 [00:14<00:06,  3.87it/s]

Scoring CSVs:  69%|██████▉   | 56/81 [00:14<00:06,  3.81it/s]

Scoring CSVs:  70%|███████   | 57/81 [00:15<00:06,  3.70it/s]

Scoring CSVs:  72%|███████▏  | 58/81 [00:15<00:06,  3.58it/s]

Scoring CSVs:  73%|███████▎  | 59/81 [00:15<00:06,  3.55it/s]

Scoring CSVs:  74%|███████▍  | 60/81 [00:15<00:05,  3.59it/s]

Scoring CSVs:  75%|███████▌  | 61/81 [00:16<00:05,  3.70it/s]

Scoring CSVs:  77%|███████▋  | 62/81 [00:16<00:05,  3.67it/s]

Scoring CSVs:  78%|███████▊  | 63/81 [00:16<00:04,  3.77it/s]

Scoring CSVs:  79%|███████▉  | 64/81 [00:16<00:04,  3.77it/s]

Scoring CSVs:  80%|████████  | 65/81 [00:17<00:04,  3.81it/s]

Scoring CSVs:  81%|████████▏ | 66/81 [00:17<00:03,  3.86it/s]

Scoring CSVs:  83%|████████▎ | 67/81 [00:17<00:03,  3.79it/s]

Scoring CSVs:  84%|████████▍ | 68/81 [00:17<00:03,  3.84it/s]

Scoring CSVs:  85%|████████▌ | 69/81 [00:18<00:03,  3.89it/s]

Scoring CSVs:  86%|████████▋ | 70/81 [00:18<00:02,  3.77it/s]

Scoring CSVs:  88%|████████▊ | 71/81 [00:18<00:02,  3.73it/s]

Scoring CSVs:  89%|████████▉ | 72/81 [00:19<00:02,  3.69it/s]

Scoring CSVs:  90%|█████████ | 73/81 [00:19<00:02,  3.78it/s]

Scoring CSVs:  91%|█████████▏| 74/81 [00:19<00:01,  3.85it/s]

Scoring CSVs:  93%|█████████▎| 75/81 [00:19<00:01,  3.89it/s]

Scoring CSVs:  94%|█████████▍| 76/81 [00:20<00:01,  3.82it/s]

Scoring CSVs:  95%|█████████▌| 77/81 [00:20<00:01,  3.86it/s]

Scoring CSVs:  96%|█████████▋| 78/81 [00:20<00:00,  3.91it/s]

Scoring CSVs:  98%|█████████▊| 79/81 [00:20<00:00,  3.83it/s]

Scoring CSVs:  99%|█████████▉| 80/81 [00:21<00:00,  3.88it/s]

Scoring CSVs: 100%|██████████| 81/81 [00:21<00:00,  3.76it/s]

Scoring CSVs: 100%|██████████| 81/81 [00:21<00:00,  3.79it/s]


Successfully scored 81 files

Top 10 best scores:
1. 87.364112 - submission.csv
2. 87.364112 - candidate_002.csv
3. 87.364112 - submission.csv
4. 87.364112 - submission.csv
5. 87.364112 - candidate_002.csv
6. 87.364112 - candidate_002.csv
7. 87.364112 - candidate_002.csv
8. 87.364112 - candidate_002.csv
9. 87.364112 - submission.csv
10. 87.364112 - submission.csv





In [6]:
# Build ensemble: for each N, pick the best configuration across all sources
print("\n" + "="*60)
print("BUILDING ENSEMBLE BASELINE")
print("="*60)

best_per_n = {n: {'score': float('inf'), 'source': None, 'data': None} for n in range(1, 201)}

for r in results:
    for n, score in r['scores_per_n'].items():
        if score < best_per_n[n]['score']:
            best_per_n[n]['score'] = score
            best_per_n[n]['source'] = r['filepath']

# Calculate ensemble score
ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"\nEnsemble baseline score: {ensemble_score:.6f}")
print(f"Target score: 68.931058")
print(f"Gap to target: {ensemble_score - 68.931058:.6f}")

# Show source distribution
from collections import Counter
sources = Counter([best_per_n[n]['source'].split('/')[-1] for n in range(1, 201)])
print(f"\nSources used in ensemble:")
for src, count in sources.most_common(10):
    print(f"  {src}: {count} configurations")


BUILDING ENSEMBLE BASELINE

Ensemble baseline score: 85.649334
Target score: 68.931058
Gap to target: 16.718276

Sources used in ensemble:
  submission.csv: 200 configurations


In [7]:
# Compare our best vs ensemble
print("\n" + "="*60)
print("COMPARISON")
print("="*60)

our_best = 117.281454
print(f"Our best score: {our_best:.6f}")
print(f"Ensemble score: {ensemble_score:.6f}")
print(f"Improvement from ensemble: {our_best - ensemble_score:.6f}")
print(f"\nTarget: 68.931058")
print(f"Gap from ensemble to target: {ensemble_score - 68.931058:.6f} ({(ensemble_score - 68.931058)/68.931058*100:.1f}%)")


COMPARISON
Our best score: 117.281454
Ensemble score: 85.649334
Improvement from ensemble: 31.632120

Target: 68.931058
Gap from ensemble to target: 16.718276 (24.3%)


In [None]:
# Analyze which N values have the most room for improvement
print("\n" + "="*60)
print("N VALUES WITH LARGEST GAPS")
print("="*60)

# Theoretical minimum for N=1 is a single tree at 45 degrees
# side = sqrt(2) * 0.8 (approximately)
theoretical_n1 = (np.sqrt(2) * 0.8)**2 / 1  # ~1.28

print(f"\nTheoretical minimum for N=1: ~{theoretical_n1:.4f}")
print(f"Current best for N=1: {best_per_n[1]['score']:.6f}")

# Show worst N values
worst_n = sorted([(n, best_per_n[n]['score']) for n in range(1, 201)], key=lambda x: -x[1])[:20]
print(f"\nTop 20 worst N values (highest score contribution):")
for n, score in worst_n:
    print(f"  N={n}: {score:.6f}")