# Evolver Loop 1 Analysis

## Goal: Find the best ensemble from all available pre-optimized CSVs

The evaluator correctly identified that we have MULTIPLE pre-optimized solutions that haven't been compared.
Let's find the best configuration for each N value across all sources.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import os
import glob
import warnings
warnings.filterwarnings('ignore')

# Tree geometry (15 vertices)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_COORDS = list(zip(TX, TY))

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg."""
    poly = Polygon(TREE_COORDS)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def load_submission(path):
    """Load a submission CSV and return parsed dataframe."""
    try:
        df = pd.read_csv(path)
        if len(df) != 20100:
            return None
        df['x_val'] = df['x'].apply(parse_value)
        df['y_val'] = df['y'].apply(parse_value)
        df['deg_val'] = df['deg'].apply(parse_value)
        df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
        return df
    except Exception as e:
        print(f"Error loading {path}: {e}")
        return None

def calculate_score_for_n(group):
    """Calculate the bounding box side length for a single N value."""
    n = group['n'].iloc[0]
    all_coords = []
    for _, row in group.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        coords = np.array(poly.exterior.coords)
        all_coords.append(coords)
    all_coords = np.vstack(all_coords)
    min_x, min_y = all_coords.min(axis=0)
    max_x, max_y = all_coords.max(axis=0)
    side = max(max_x - min_x, max_y - min_y)
    return side

print("Functions defined.")

Functions defined.


In [2]:
# Find all CSV files in preoptimized directory
base_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

csv_files = []

# Main directory files
for f in ['santa-2025.csv', 'best_ensemble.csv', 'ensemble.csv', 'submission.csv']:
    path = os.path.join(base_path, f)
    if os.path.exists(path):
        csv_files.append(path)

# Subdirectories
for subdir in ['bucket-of-chump', 'santa25-public', 'telegram', 'chistyakov', 'blended', 'santa-2025-try3']:
    subdir_path = os.path.join(base_path, subdir)
    if os.path.exists(subdir_path):
        for f in os.listdir(subdir_path):
            if f.endswith('.csv'):
                csv_files.append(os.path.join(subdir_path, f))

print(f"Found {len(csv_files)} CSV files:")
for f in csv_files:
    print(f"  - {os.path.relpath(f, base_path)}")

Found 27 CSV files:
  - santa-2025.csv
  - best_ensemble.csv
  - ensemble.csv
  - submission.csv
  - bucket-of-chump/submission.csv
  - santa25-public/submission_JKoT4.csv
  - santa25-public/New_Tree_144_196.csv
  - santa25-public/submission_JKoT3.csv
  - santa25-public/santa2025_ver2_v61.csv
  - santa25-public/submission_JKoT2.csv
  - santa25-public/santa2025_ver2_v67.csv
  - santa25-public/santa2025_ver2_v76.csv
  - santa25-public/submission_70_936673758122.csv
  - santa25-public/santa2025_ver2_v65.csv
  - santa25-public/submission_70_926149550346.csv
  - santa25-public/santa2025_ver2_v66.csv
  - santa25-public/santa2025_ver2_v63.csv
  - santa25-public/santa2025_ver2_v69.csv
  - santa25-public/submission_JKoT1.csv
  - santa25-public/submission_opt1.csv
  - santa25-public/santa2025_ver2_v68.csv
  - telegram/72.49.csv
  - telegram/71.97.csv
  - chistyakov/submission_best.csv
  - blended/submission (77).csv
  - santa-2025-try3/submission.csv
  - santa-2025-try3/submission_sa.csv


In [3]:
# Load all submissions and calculate per-N scores
submissions = {}
scores_by_source = {}

for path in csv_files:
    name = os.path.relpath(path, base_path)
    df = load_submission(path)
    if df is not None:
        submissions[name] = df
        # Calculate score for each N
        scores = {}
        for n in range(1, 201):
            group = df[df['n'] == n]
            if len(group) == n:
                side = calculate_score_for_n(group)
                scores[n] = side ** 2 / n
            else:
                scores[n] = float('inf')
        scores_by_source[name] = scores
        total = sum(scores.values())
        print(f"{name}: Total score = {total:.6f}")
    else:
        print(f"{name}: FAILED TO LOAD")

print(f"\nSuccessfully loaded {len(submissions)} submissions.")

santa-2025.csv: Total score = 70.676102


best_ensemble.csv: Total score = 70.676102


ensemble.csv: Total score = 70.676102


submission.csv: Total score = 70.676501


bucket-of-chump/submission.csv: Total score = 70.676501


santa25-public/submission_JKoT4.csv: Total score = 72.489504


santa25-public/New_Tree_144_196.csv: Total score = 72.927920


santa25-public/submission_JKoT3.csv: Total score = 72.489488


santa25-public/santa2025_ver2_v61.csv: Total score = 72.951925


santa25-public/submission_JKoT2.csv: Total score = 72.489348


santa25-public/santa2025_ver2_v67.csv: Total score = 72.938567


santa25-public/santa2025_ver2_v76.csv: Total score = 72.826444


santa25-public/submission_70_936673758122.csv: Total score = 70.936674


santa25-public/santa2025_ver2_v65.csv: Total score = 72.935294


santa25-public/submission_70_926149550346.csv: Total score = 70.926150


santa25-public/santa2025_ver2_v66.csv: Total score = 72.938599


santa25-public/santa2025_ver2_v63.csv: Total score = 72.947427


santa25-public/santa2025_ver2_v69.csv: Total score = 72.850110


santa25-public/submission_JKoT1.csv: Total score = 72.489483


santa25-public/submission_opt1.csv: Total score = 70.990692


santa25-public/santa2025_ver2_v68.csv: Total score = 72.939233


telegram/72.49.csv: Total score = 72.495739


telegram/71.97.csv: Total score = 71.972027


chistyakov/submission_best.csv: Total score = 70.926150


blended/submission (77).csv: Total score = 72.135010


santa-2025-try3/submission.csv: Total score = 72.935294


santa-2025-try3/submission_sa.csv: Total score = 72.935294

Successfully loaded 27 submissions.


In [4]:
# Find the best source for each N
best_for_n = {}
best_source_for_n = {}

for n in range(1, 201):
    best_score = float('inf')
    best_source = None
    for source, scores in scores_by_source.items():
        if scores[n] < best_score:
            best_score = scores[n]
            best_source = source
    best_for_n[n] = best_score
    best_source_for_n[n] = best_source

# Calculate ensemble score
ensemble_score = sum(best_for_n.values())
print(f"\n{'='*60}")
print(f"ENSEMBLE SCORE (best for each N): {ensemble_score:.6f}")
print(f"Baseline (santa-2025.csv): 70.676102")
print(f"Improvement: {70.676102 - ensemble_score:.6f}")
print(f"{'='*60}")


ENSEMBLE SCORE (best for each N): 70.676102
Baseline (santa-2025.csv): 70.676102
Improvement: -0.000000


In [5]:
# Show which sources contribute to the ensemble
from collections import Counter
source_counts = Counter(best_source_for_n.values())
print("\nSources contributing to ensemble:")
for source, count in source_counts.most_common():
    print(f"  {source}: {count} N values")


Sources contributing to ensemble:
  santa-2025.csv: 200 N values


In [6]:
# Show N values where ensemble differs from santa-2025.csv
print("\nN values where ensemble beats santa-2025.csv:")
improvements = []
for n in range(1, 201):
    santa_score = scores_by_source.get('santa-2025.csv', {}).get(n, float('inf'))
    ensemble_score_n = best_for_n[n]
    if ensemble_score_n < santa_score - 1e-9:
        improvement = santa_score - ensemble_score_n
        improvements.append((n, improvement, best_source_for_n[n]))
        
if improvements:
    improvements.sort(key=lambda x: -x[1])
    print(f"Found {len(improvements)} N values with improvements:")
    for n, imp, source in improvements[:20]:
        print(f"  N={n}: improvement={imp:.6f} from {source}")
else:
    print("No improvements found - santa-2025.csv dominates all N values.")


N values where ensemble beats santa-2025.csv:
No improvements found - santa-2025.csv dominates all N values.


In [7]:
# Create the ensemble submission
if ensemble_score < 70.676102:
    print("Creating ensemble submission...")
    ensemble_rows = []
    for n in range(1, 201):
        source = best_source_for_n[n]
        df = submissions[source]
        group = df[df['n'] == n]
        for _, row in group.iterrows():
            ensemble_rows.append({
                'id': row['id'],
                'x': row['x'],
                'y': row['y'],
                'deg': row['deg']
            })
    
    ensemble_df = pd.DataFrame(ensemble_rows)
    ensemble_path = '/home/submission/submission.csv'
    ensemble_df.to_csv(ensemble_path, index=False)
    print(f"Ensemble saved to {ensemble_path}")
    print(f"Rows: {len(ensemble_df)}")
else:
    print("No improvement from ensemble - keeping santa-2025.csv")

No improvement from ensemble - keeping santa-2025.csv


In [8]:
# Analyze per-N scores to find optimization opportunities
print("\nPer-N score analysis (worst 20 N values):")
n_scores = [(n, best_for_n[n]) for n in range(1, 201)]
n_scores.sort(key=lambda x: -x[1])
print("N\tScore\t\tSource")
for n, score in n_scores[:20]:
    print(f"{n}\t{score:.6f}\t{best_source_for_n[n]}")


Per-N score analysis (worst 20 N values):
N	Score		Source
1	0.661250	santa-2025.csv
2	0.450779	santa-2025.csv
3	0.434745	santa-2025.csv
5	0.416850	santa-2025.csv
4	0.416545	santa-2025.csv
7	0.399897	santa-2025.csv
6	0.399610	santa-2025.csv
9	0.387415	santa-2025.csv
8	0.385407	santa-2025.csv
15	0.379203	santa-2025.csv
10	0.376630	santa-2025.csv
21	0.376451	santa-2025.csv
20	0.376057	santa-2025.csv
11	0.375736	santa-2025.csv
22	0.375258	santa-2025.csv
16	0.374128	santa-2025.csv
26	0.373997	santa-2025.csv
12	0.372724	santa-2025.csv
13	0.372323	santa-2025.csv
25	0.372144	santa-2025.csv


In [9]:
# Check theoretical minimum for N=1
# For a single tree at 45 degrees, the bounding box should be minimal
import math

# Tree at 45 degrees
poly = create_tree_polygon(0, 0, 45)
coords = np.array(poly.exterior.coords)
min_x, min_y = coords.min(axis=0)
max_x, max_y = coords.max(axis=0)
side_45 = max(max_x - min_x, max_y - min_y)
score_45 = side_45 ** 2

print(f"\nN=1 theoretical analysis:")
print(f"  At 45 degrees: side={side_45:.6f}, score={score_45:.6f}")

# Try other angles
best_angle = 45
best_score = score_45
for angle in range(0, 360, 1):
    poly = create_tree_polygon(0, 0, angle)
    coords = np.array(poly.exterior.coords)
    min_x, min_y = coords.min(axis=0)
    max_x, max_y = coords.max(axis=0)
    side = max(max_x - min_x, max_y - min_y)
    score = side ** 2
    if score < best_score:
        best_score = score
        best_angle = angle

print(f"  Best angle found: {best_angle} degrees, score={best_score:.6f}")
print(f"  Current N=1 score: {best_for_n[1]:.6f}")
print(f"  Potential improvement: {best_for_n[1] - best_score:.6f}")


N=1 theoretical analysis:
  At 45 degrees: side=0.813173, score=0.661250
  Best angle found: 45 degrees, score=0.661250
  Current N=1 score: 0.661250
  Potential improvement: -0.000000


In [10]:
# Calculate the gap to target and what improvement is needed per N\ntarget = 68.894234\ncurrent = 70.676102\ngap = current - target\nprint(f"Current score: {current:.6f}")\nprint(f"Target score: {target:.6f}")\nprint(f"Gap: {gap:.6f} ({100*gap/current:.2f}%)")\nprint(f"\\nAverage improvement needed per N: {gap/200:.6f}")\n\n# Show how much each N would need to improve to close the gap proportionally\nprint("\\nIf we improve each N proportionally:\")\nfor n in [1, 2, 3, 5, 10, 50, 100, 200]:\n    current_n = best_for_n[n]\n    needed_improvement = current_n * (gap / current)\n    new_score = current_n - needed_improvement\n    print(f"  N={n}: current={current_n:.6f}, need={new_score:.6f} (improve by {needed_improvement:.6f})")