# Evolver Loop 1 Analysis

Analyze available pre-optimized solutions and understand the score gap.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal
from shapely.geometry import Polygon
from shapely import affinity
import os
import glob

print('Libraries loaded')

Libraries loaded


In [2]:
# Define the ChristmasTree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)
        
        # 15-vertex polygon definition
        initial_polygon = Polygon([
            (0.0, 0.8),      # Tip
            (0.125, 0.5),    # Right top tier
            (0.0625, 0.5),
            (0.2, 0.25),     # Right mid tier
            (0.1, 0.25),
            (0.35, 0.0),     # Right base
            (0.075, 0.0),    # Right trunk
            (0.075, -0.2),
            (-0.075, -0.2),  # Left trunk
            (-0.075, 0.0),
            (-0.35, 0.0),    # Left base
            (-0.1, 0.25),    # Left mid tier
            (-0.2, 0.25),
            (-0.0625, 0.5),  # Left top tier
            (-0.125, 0.5),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

print('ChristmasTree class defined')

ChristmasTree class defined


In [3]:
def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    prefix = f'{n:03d}_'
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def get_score_for_n(df, n):
    trees = load_trees_for_n(df, n)
    if len(trees) != n:
        return None
    all_points = []
    for tree in trees:
        coords = list(tree.polygon.exterior.coords)
        all_points.extend(coords)
    xys = np.array(all_points)
    x_range = xys[:, 0].max() - xys[:, 0].min()
    y_range = xys[:, 1].max() - xys[:, 1].min()
    side = max(x_range, y_range)
    return side**2 / n

def get_total_score(df):
    total = 0
    scores_by_n = {}
    for n in range(1, 201):
        score_n = get_score_for_n(df, n)
        if score_n is not None:
            total += score_n
            scores_by_n[n] = score_n
    return total, scores_by_n

print('Helper functions defined')

Helper functions defined


In [4]:
# Check available pre-optimized solutions
preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'
csv_files = [
    f'{preopt_dir}/santa-2025.csv',
    f'{preopt_dir}/best_ensemble.csv',
    f'{preopt_dir}/ensemble.csv',
    f'{preopt_dir}/submission.csv',
]

results = []
for csv_file in csv_files:
    if os.path.exists(csv_file):
        df = pd.read_csv(csv_file)
        total, scores = get_total_score(df)
        results.append({'file': os.path.basename(csv_file), 'score': total})
        print(f'{os.path.basename(csv_file)}: {total:.6f}')

print('\nTarget: 68.919154')
print(f'Gap from best: {min(r["score"] for r in results) - 68.919154:.6f}')

santa-2025.csv: 70.676102


best_ensemble.csv: 70.676102


ensemble.csv: 70.676102


submission.csv: 70.676501

Target: 68.919154
Gap from best: 1.756948


In [5]:
# Load the best solution and analyze per-N scores
best_file = f'{preopt_dir}/santa-2025.csv'
df = pd.read_csv(best_file)
total, scores_by_n = get_total_score(df)

# Show worst N values
scores_sorted = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
print('Top 20 worst N values (highest score contribution):')
for n, score in scores_sorted[:20]:
    trees = load_trees_for_n(df, n)
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    xys = np.array(all_points)
    side = max(xys[:, 0].max() - xys[:, 0].min(), xys[:, 1].max() - xys[:, 1].min())
    print(f'  N={n:3d}: side={side:.6f}, score_contribution={score:.6f}')

Top 20 worst N values (highest score contribution):
  N=  1: side=0.813173, score_contribution=0.661250
  N=  2: side=0.949504, score_contribution=0.450779
  N=  3: side=1.142031, score_contribution=0.434745
  N=  5: side=1.443692, score_contribution=0.416850
  N=  4: side=1.290806, score_contribution=0.416545
  N=  7: side=1.673104, score_contribution=0.399897
  N=  6: side=1.548438, score_contribution=0.399610
  N=  9: side=1.867280, score_contribution=0.387415
  N=  8: side=1.755921, score_contribution=0.385407
  N= 15: side=2.384962, score_contribution=0.379203
  N= 10: side=1.940696, score_contribution=0.376630
  N= 21: side=2.811667, score_contribution=0.376451
  N= 20: side=2.742469, score_contribution=0.376057
  N= 11: side=2.033002, score_contribution=0.375736
  N= 22: side=2.873270, score_contribution=0.375258
  N= 16: side=2.446640, score_contribution=0.374128
  N= 26: side=3.118320, score_contribution=0.373997
  N= 12: side=2.114873, score_contribution=0.372724
  N= 13: sid

In [6]:
# Analyze score distribution by N ranges
ranges = [(1, 10), (11, 20), (21, 50), (51, 100), (101, 150), (151, 200)]
for start, end in ranges:
    range_score = sum(scores_by_n[n] for n in range(start, end+1))
    print(f'N={start:3d}-{end:3d}: {range_score:.6f} ({range_score/total*100:.1f}%)')

print(f'\nTotal: {total:.6f}')

N=  1- 10: 4.329128 (6.1%)
N= 11- 20: 3.728167 (5.3%)
N= 21- 50: 10.984878 (15.5%)
N= 51-100: 17.641148 (25.0%)
N=101-150: 17.144118 (24.3%)
N=151-200: 16.848664 (23.8%)

Total: 70.676102


In [7]:
# Calculate theoretical lower bound
# For N=1, optimal is a single tree at 45 degrees
# Tree bounding box at 45 degrees: sqrt(2) * max_dimension
# Tree dimensions: width=0.7, height=1.0
# At 45 degrees, bounding box is approximately sqrt(0.7^2 + 1.0^2) = 1.22

import math

# Tree dimensions
tree_width = 0.7  # base width
tree_height = 1.0  # total height

# At 45 degrees, the bounding box diagonal is:
diag = math.sqrt(tree_width**2 + tree_height**2)
print(f'Tree diagonal: {diag:.6f}')

# For N=1 at 45 degrees:
n1_tree = ChristmasTree('0', '0', '45')
coords = np.array(list(n1_tree.polygon.exterior.coords))
side_n1 = max(coords[:, 0].max() - coords[:, 0].min(), coords[:, 1].max() - coords[:, 1].min())
print(f'N=1 at 45 degrees: side={side_n1:.6f}, score={side_n1**2:.6f}')

# Try different angles for N=1
best_angle = 0
best_side = float('inf')
for angle in range(0, 360):
    tree = ChristmasTree('0', '0', str(angle))
    coords = np.array(list(tree.polygon.exterior.coords))
    side = max(coords[:, 0].max() - coords[:, 0].min(), coords[:, 1].max() - coords[:, 1].min())
    if side < best_side:
        best_side = side
        best_angle = angle

print(f'\nBest angle for N=1: {best_angle} degrees, side={best_side:.6f}, score={best_side**2:.6f}')

Tree diagonal: 1.220656
N=1 at 45 degrees: side=0.813173, score=0.661250

Best angle for N=1: 45 degrees, side=0.813173, score=0.661250


In [8]:
# More precise search for N=1 optimal angle
best_angle = 0
best_side = float('inf')
for angle in np.arange(0, 360, 0.1):
    tree = ChristmasTree('0', '0', str(angle))
    coords = np.array(list(tree.polygon.exterior.coords))
    side = max(coords[:, 0].max() - coords[:, 0].min(), coords[:, 1].max() - coords[:, 1].min())
    if side < best_side:
        best_side = side
        best_angle = angle

print(f'Best angle for N=1 (0.1 degree precision): {best_angle:.1f} degrees')
print(f'Side: {best_side:.6f}, Score: {best_side**2:.6f}')
print(f'Current N=1 score: {scores_by_n[1]:.6f}')
print(f'Potential improvement: {scores_by_n[1] - best_side**2:.6f}')

Best angle for N=1 (0.1 degree precision): 45.0 degrees
Side: 0.813173, Score: 0.661250
Current N=1 score: 0.661250
Potential improvement: -0.000000


In [9]:
# Check if there are multiple pre-optimized solutions we can ensemble
# Look for all CSV files in the preoptimized directory
all_csvs = []
for root, dirs, files in os.walk(preopt_dir):
    for f in files:
        if f.endswith('.csv'):
            all_csvs.append(os.path.join(root, f))

print(f'Found {len(all_csvs)} CSV files:')
for csv in all_csvs:
    print(f'  {csv}')

Found 30 CSV files:
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/telegram_extracted/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/telegram_extracted/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/submission_JKoT4.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/New_Tree_144_196.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimi

In [10]:
# Load all solutions and find best per-N
all_solutions = {}
for csv_file in all_csvs:
    try:
        df = pd.read_csv(csv_file)
        if 'id' not in df.columns:
            continue
        total, scores = get_total_score(df)
        all_solutions[csv_file] = {'df': df, 'total': total, 'scores': scores}
        print(f'{os.path.basename(csv_file)}: {total:.6f}')
    except Exception as e:
        print(f'Error loading {csv_file}: {e}')

ensemble.csv: 70.676102


submission.csv: 70.676501


santa-2025.csv: 70.676102


best_ensemble.csv: 70.676102


72.49.csv: 72.495739


71.97.csv: 71.972027


72.49.csv: 72.495739


71.97.csv: 71.972027


submission_JKoT4.csv: 72.489504


New_Tree_144_196.csv: 72.927920


submission_JKoT3.csv: 72.489488


santa2025_ver2_v61.csv: 72.951925


submission_JKoT2.csv: 72.489348


santa2025_ver2_v67.csv: 72.938567


santa2025_ver2_v76.csv: 72.826444


submission_70_936673758122.csv: 70.936674


santa2025_ver2_v65.csv: 72.935294


submission_70_926149550346.csv: 70.926150


santa2025_ver2_v66.csv: 72.938599


santa2025_ver2_v63.csv: 72.947427


santa2025_ver2_v69.csv: 72.850110


submission_JKoT1.csv: 72.489483


submission_opt1.csv: 70.990692


santa2025_ver2_v68.csv: 72.939233


santa-2025.csv: 70.676102


submission.csv: 70.676501


submission (77).csv: 72.135010


submission.csv: 72.935294


submission_sa.csv: 72.935294


submission_best.csv: 70.926150


In [11]:
# Find best solution for each N across all files
best_per_n = {}
for n in range(1, 201):
    best_score = float('inf')
    best_source = None
    for csv_file, data in all_solutions.items():
        if n in data['scores'] and data['scores'][n] < best_score:
            best_score = data['scores'][n]
            best_source = csv_file
    best_per_n[n] = {'score': best_score, 'source': best_source}

# Calculate ensemble score
ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f'\nEnsemble score (best per N): {ensemble_score:.6f}')
print(f'Target: 68.919154')
print(f'Gap: {ensemble_score - 68.919154:.6f}')


Ensemble score (best per N): 70.676102
Target: 68.919154
Gap: 1.756948


In [None]:
# Show which N values have different best sources
from collections import Counter
source_counts = Counter(best_per_n[n]['source'] for n in range(1, 201))
print('\nSource distribution:')
for source, count in source_counts.most_common():
    print(f'  {os.path.basename(source)}: {count} N values')

In [None]:
# Check the bucket-of-chump directory for additional solutions
bucket_dir = f'{preopt_dir}/bucket-of-chump'
if os.path.exists(bucket_dir):
    bucket_csvs = glob.glob(f'{bucket_dir}/**/*.csv', recursive=True)
    print(f'Found {len(bucket_csvs)} CSV files in bucket-of-chump')
    for csv in bucket_csvs[:10]:
        print(f'  {csv}')

In [None]:
# Summary
print('='*60)
print('SUMMARY')
print('='*60)
print(f'Current best score: {total:.6f}')
print(f'Target score: 68.919154')
print(f'Gap to close: {total - 68.919154:.6f} ({(total - 68.919154)/68.919154*100:.2f}%)')
print()
print('Key insights:')
print('1. N=1 contributes 0.66 to score - highest single contribution')
print('2. Small N (1-10) contribute disproportionately')
print('3. Standard optimization is exhausted (7850 rounds, 0 improvements)')
print('4. Need fundamentally different approaches:')
print('   - Lattice/tessellation for large N')
print('   - Exhaustive search for small N')
print('   - Ensemble from multiple sources')