# Evolver Loop 3 Analysis

## Current Status
- Best CV: 70.682741 (exp_002)
- Target: 68.922808
- Gap: 1.76 points (2.5%)
- No LB submissions yet!

## Key Questions
1. Can we improve by ensembling more sources?
2. What's the best approach to close the 1.76 point gap?
3. Should we compile and run the C++ SA optimizer from jonathanchan kernel?

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import os
import glob

getcontext().prec = 30
scale_factor = Decimal('1e15')

print('Libraries loaded')

Libraries loaded


In [2]:
# Define scoring functions
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def get_tree_list_side_length(tree_list):
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(max(bounds[2] - bounds[0], bounds[3] - bounds[1])) / scale_factor

def get_score(trees, n):
    if not trees:
        return 0.0
    side = get_tree_list_side_length(trees)
    return float(side ** 2 / Decimal(n))

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def load_configuration_from_df(n, df):
    group_data = df[df['id'].str.startswith(f'{n:03d}_')]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row['x'])[1:] if str(row['x']).startswith('s') else str(row['x'])
        y = str(row['y'])[1:] if str(row['y']).startswith('s') else str(row['y'])
        deg = str(row['deg'])[1:] if str(row['deg']).startswith('s') else str(row['deg'])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

print('Scoring functions defined')

Scoring functions defined


In [3]:
# List all available pre-optimized CSVs
print('Available pre-optimized CSVs:')
print('=' * 60)

csv_sources = [
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/chistyakov/submission_best.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv',
    '/home/code/experiments/003_preoptimized/repaired_baseline.csv',
]

for path in csv_sources:
    if os.path.exists(path):
        print(f'  {os.path.basename(path)}: {path}')
    else:
        print(f'  MISSING: {path}')

Available pre-optimized CSVs:
  santa-2025.csv: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv
  submission.csv: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv
  ensemble.csv: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv
  submission_best.csv: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/chistyakov/submission_best.csv
  71.97.csv: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv
  72.49.csv: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv
  submission.csv: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv
  repaired_baseline.csv: /home/code/experiments/003_preoptimized/repaired_baseline.csv


In [4]:
# Score each source and count valid configurations
print('\nScoring each source...')
print('=' * 60)

source_stats = {}

for path in csv_sources:
    if not os.path.exists(path):
        continue
    
    try:
        df = pd.read_csv(path)
        total_score = 0.0
        valid_count = 0
        overlap_count = 0
        
        for n in range(1, 201):
            trees = load_configuration_from_df(n, df)
            if trees:
                if has_overlap(trees):
                    overlap_count += 1
                else:
                    score = get_score(trees, n)
                    total_score += score
                    valid_count += 1
        
        source_stats[path] = {
            'total_score': total_score,
            'valid_count': valid_count,
            'overlap_count': overlap_count
        }
        
        name = os.path.basename(path)
        print(f'{name}:')
        print(f'  Total score: {total_score:.6f}')
        print(f'  Valid N: {valid_count}, Overlaps: {overlap_count}')
        print()
    except Exception as e:
        print(f'{os.path.basename(path)}: Error - {e}')


Scoring each source...


santa-2025.csv:
  Total score: 66.470080
  Valid N: 188, Overlaps: 12



submission.csv:
  Total score: 66.737056
  Valid N: 189, Overlaps: 11



ensemble.csv:
  Total score: 66.053231
  Valid N: 187, Overlaps: 13



submission_best.csv:
  Total score: 70.926150
  Valid N: 200, Overlaps: 0



71.97.csv:
  Total score: 71.972027
  Valid N: 200, Overlaps: 0



72.49.csv:
  Total score: 72.495739
  Valid N: 200, Overlaps: 0



submission.csv:
  Total score: 66.737056
  Valid N: 189, Overlaps: 11



repaired_baseline.csv:
  Total score: 70.682741
  Valid N: 200, Overlaps: 0



In [5]:
# Create ensemble - for each N, take the best configuration from all sources
print('Creating ensemble from all sources...')
print('=' * 60)

best_configs = {}
best_scores = {}
best_sources = {}

for path in csv_sources:
    if not os.path.exists(path):
        continue
    
    try:
        df = pd.read_csv(path)
        name = os.path.basename(path)
        
        for n in range(1, 201):
            trees = load_configuration_from_df(n, df)
            if trees and not has_overlap(trees):
                score = get_score(trees, n)
                if n not in best_scores or score < best_scores[n]:
                    best_scores[n] = score
                    best_configs[n] = trees
                    best_sources[n] = name
    except Exception as e:
        print(f'Error loading {path}: {e}')

# Calculate ensemble total
ensemble_total = sum(best_scores.values())
print(f'\nEnsemble total score: {ensemble_total:.6f}')
print(f'Gap to target (68.92): {ensemble_total - 68.922808:.6f} points')
print(f'Valid N configurations: {len(best_configs)}')

# Show source distribution
from collections import Counter
source_counts = Counter(best_sources.values())
print('\nSource distribution:')
for src, count in source_counts.most_common():
    print(f'  {src}: {count} N values')

Creating ensemble from all sources...



Ensemble total score: 70.679449
Gap to target (68.92): 1.756641 points
Valid N configurations: 200

Source distribution:
  santa-2025.csv: 188 N values
  submission.csv: 10 N values
  submission_best.csv: 2 N values


In [6]:
# Analyze which N values have the worst scores (most room for improvement)
print('N values with highest scores (most room for improvement):')
print('=' * 60)

# Sort by score descending
sorted_scores = sorted(best_scores.items(), key=lambda x: x[1], reverse=True)

print('\nTop 20 worst-performing N values:')
for n, score in sorted_scores[:20]:
    print(f'  N={n:3d}: score={score:.6f}, source={best_sources[n]}')

N values with highest scores (most room for improvement):

Top 20 worst-performing N values:
  N=  1: score=0.661250, source=santa-2025.csv
  N=  2: score=0.450779, source=santa-2025.csv
  N=  3: score=0.434745, source=santa-2025.csv
  N=  5: score=0.416850, source=santa-2025.csv
  N=  4: score=0.416545, source=santa-2025.csv
  N=  7: score=0.399897, source=santa-2025.csv
  N=  6: score=0.399610, source=santa-2025.csv
  N=  9: score=0.387415, source=santa-2025.csv
  N=  8: score=0.385407, source=santa-2025.csv
  N= 15: score=0.379203, source=santa-2025.csv
  N= 10: score=0.376630, source=santa-2025.csv
  N= 21: score=0.376451, source=santa-2025.csv
  N= 20: score=0.376057, source=santa-2025.csv
  N= 11: score=0.375736, source=santa-2025.csv
  N= 22: score=0.375258, source=santa-2025.csv
  N= 16: score=0.374128, source=santa-2025.csv
  N= 26: score=0.373997, source=santa-2025.csv
  N= 12: score=0.372724, source=santa-2025.csv
  N= 13: score=0.372323, source=santa-2025.csv
  N= 25: score

In [7]:
# Check N=1 optimal configuration
# For N=1, the optimal is a single tree at (0,0) with 45 degree rotation
# This minimizes the bounding box side length

print('Checking N=1 configuration:')
print('=' * 60)

# Current N=1
current_n1 = best_configs[1]
print(f'Current N=1 score: {best_scores[1]:.6f}')
for tree in current_n1:
    print(f'  x={tree.center_x}, y={tree.center_y}, angle={tree.angle}')

# Try optimal N=1 (45 degree rotation)
optimal_n1 = [ChristmasTree('0', '0', '45')]
optimal_score = get_score(optimal_n1, 1)
print(f'\nOptimal N=1 (45 deg rotation) score: {optimal_score:.6f}')

# Try 0 degree rotation
zero_n1 = [ChristmasTree('0', '0', '0')]
zero_score = get_score(zero_n1, 1)
print(f'N=1 with 0 deg rotation score: {zero_score:.6f}')

if optimal_score < best_scores[1]:
    improvement = best_scores[1] - optimal_score
    print(f'\nPotential improvement for N=1: {improvement:.6f} points!')
    best_configs[1] = optimal_n1
    best_scores[1] = optimal_score
    best_sources[1] = 'optimal'

Checking N=1 configuration:
Current N=1 score: 0.661250
  x=-48.196086194214246, y=58.770984615214225, angle=45.0

Optimal N=1 (45 deg rotation) score: 0.661250
N=1 with 0 deg rotation score: 1.000000


In [8]:
# Save the ensemble submission
print('Saving ensemble submission...')
print('=' * 60)

# Calculate final score
final_score = sum(best_scores.values())
print(f'Final ensemble score: {final_score:.6f}')
print(f'Gap to target (68.92): {final_score - 68.922808:.6f} points')

# Save to CSV
index = [f'{n:03d}_{t}' for n in range(1, 201) for t in range(n)]

tree_data = []
for n in range(1, 201):
    for tree in best_configs[n]:
        tree_data.append([float(tree.center_x), float(tree.center_y), float(tree.angle)])

cols = ['x', 'y', 'deg']
submission = pd.DataFrame(index=index, columns=cols, data=tree_data).rename_axis('id')

for col in cols:
    submission[col] = submission[col].astype(float).round(decimals=6)

for col in submission.columns:
    submission[col] = 's' + submission[col].astype('string')

submission.to_csv('/home/code/experiments/003_preoptimized/ensemble_submission.csv')
print('Saved to /home/code/experiments/003_preoptimized/ensemble_submission.csv')

Saving ensemble submission...
Final ensemble score: 70.679449
Gap to target (68.92): 1.756641 points
Saved to /home/code/experiments/003_preoptimized/ensemble_submission.csv
