# Evolver Loop 2 Analysis

Goal: Find the best pre-optimized submission across ALL snapshots and understand what's needed to reach target 68.919154

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os
from glob import glob
import warnings
warnings.filterwarnings('ignore')

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
BASE_TREE = Polygon(zip(TX, TY))

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    tree = affinity.rotate(BASE_TREE, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

def calculate_score(df):
    """Calculate total score for a submission"""
    total = 0
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        n_trees = df[df['id'].str.startswith(prefix)]
        if len(n_trees) != n:
            return None  # Invalid submission
        
        all_coords = []
        for _, row in n_trees.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            poly = create_tree_polygon(x, y, deg)
            coords = np.array(poly.exterior.coords)
            all_coords.append(coords)
        
        all_coords = np.vstack(all_coords)
        x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
        y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
        side = max(x_range, y_range)
        total += side**2 / n
    
    return total

print("Functions defined")

In [None]:
# Find ALL CSV files with 20101 lines (valid submissions)
import subprocess

result = subprocess.run(
    ['find', '/home/nonroot/snapshots/santa-2025', '-name', '*.csv', '-size', '+1M'],
    capture_output=True, text=True
)
all_csvs = result.stdout.strip().split('\n')
print(f"Found {len(all_csvs)} CSV files")

# Filter to valid submissions (20101 lines)
valid_csvs = []
for csv_path in all_csvs:
    if not csv_path:
        continue
    try:
        result = subprocess.run(['wc', '-l', csv_path], capture_output=True, text=True)
        lines = int(result.stdout.split()[0])
        if lines == 20101:
            valid_csvs.append(csv_path)
    except:
        pass

print(f"Found {len(valid_csvs)} valid submissions (20101 lines)")

In [None]:
# Score a sample of submissions to find the best ones
import random

# First, let's check the known good files
known_files = [
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv',
    '/home/nonroot/snapshots/santa-2025/21164519357/code/exploration/santa-2025.csv',
]

scores = {}
for f in known_files:
    try:
        df = pd.read_csv(f)
        score = calculate_score(df)
        if score:
            scores[f] = score
            print(f"{os.path.basename(f)}: {score:.6f}")
    except Exception as e:
        print(f"Error with {f}: {e}")

In [None]:
# Check more files - sample from valid_csvs
print("\nChecking more files...")
for f in valid_csvs[:50]:  # Check first 50
    if f in scores:
        continue
    try:
        df = pd.read_csv(f)
        score = calculate_score(df)
        if score and score < 75:  # Only keep good scores
            scores[f] = score
            if score < 70.7:
                print(f"{score:.6f}: {f}")
    except Exception as e:
        pass

print(f"\nTotal files scored: {len(scores)}")

In [None]:
# Find the best score
best_file = min(scores, key=scores.get)
best_score = scores[best_file]
print(f"\nBest score found: {best_score:.6f}")
print(f"File: {best_file}")
print(f"\nTarget: 68.919154")
print(f"Gap: {best_score - 68.919154:.6f} ({(best_score - 68.919154) / 68.919154 * 100:.2f}%)")

In [None]:
# Check all files with scores below 70.7
print("\nAll files with score < 70.7:")
for f, s in sorted(scores.items(), key=lambda x: x[1]):
    if s < 70.7:
        print(f"{s:.6f}: {os.path.basename(f)}")

In [None]:
# Analyze the best file - which N values have room for improvement?
best_df = pd.read_csv(best_file)

print("Score breakdown by N (top 20 contributors):")
n_scores = []
for n in range(1, 201):
    prefix = f"{n:03d}_"
    n_trees = best_df[best_df['id'].str.startswith(prefix)]
    
    all_coords = []
    for _, row in n_trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        coords = np.array(poly.exterior.coords)
        all_coords.append(coords)
    
    all_coords = np.vstack(all_coords)
    x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
    y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
    side = max(x_range, y_range)
    score = side**2 / n
    n_scores.append((n, side, score))

# Sort by score contribution (highest first)
n_scores_sorted = sorted(n_scores, key=lambda x: x[2], reverse=True)
for n, side, score in n_scores_sorted[:20]:
    print(f"N={n:3d}: side={side:.6f}, score={score:.6f}")

In [None]:
# Calculate theoretical lower bound
# Tree area is approximately 0.2625 (from BASE_TREE.area)
tree_area = BASE_TREE.area
print(f"Tree area: {tree_area:.6f}")

# For N trees, minimum bounding box area >= N * tree_area
# So minimum side >= sqrt(N * tree_area)
# And minimum score contribution >= N * tree_area / N = tree_area

print("\nTheoretical lower bounds (assuming perfect packing):")
theoretical_total = 0
for n in range(1, 201):
    # Minimum side is sqrt(N * tree_area) if trees could pack perfectly
    min_side = np.sqrt(n * tree_area)
    min_score = min_side**2 / n  # = tree_area
    theoretical_total += min_score

print(f"Theoretical minimum (perfect packing): {theoretical_total:.6f}")
print(f"Current best: {best_score:.6f}")
print(f"Target: 68.919154")
print(f"\nPacking efficiency needed for target: {theoretical_total / 68.919154 * 100:.1f}%")
print(f"Current packing efficiency: {theoretical_total / best_score * 100:.1f}%")

In [None]:
# Check which N values have the most room for improvement
print("\nN values with most room for improvement (vs theoretical):")
improvements = []
for n, side, score in n_scores:
    min_side = np.sqrt(n * tree_area)
    min_score = tree_area  # = min_side^2 / n
    room = score - min_score
    efficiency = min_score / score * 100
    improvements.append((n, side, score, min_score, room, efficiency))

# Sort by room for improvement
improvements_sorted = sorted(improvements, key=lambda x: x[4], reverse=True)
print("\nTop 20 N values with most room for improvement:")
for n, side, score, min_score, room, eff in improvements_sorted[:20]:
    print(f"N={n:3d}: score={score:.4f}, theoretical={min_score:.4f}, room={room:.4f}, eff={eff:.1f}%")