# Evolver Loop 1 Analysis

Analyzing the baseline and identifying improvement opportunities.

In [1]:
import pandas as pd
import numpy as np
import os
import json

# Tree geometry (15 vertices)
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_vertices(x, y, deg):
    """Get the vertices of a tree at position (x, y) with rotation deg."""
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotated_x = TX * cos_a - TY * sin_a + x
    rotated_y = TX * sin_a + TY * cos_a + y
    return rotated_x, rotated_y

def get_bounding_box(trees_df):
    """Calculate the bounding box side length for a set of trees."""
    all_x = []
    all_y = []
    
    for _, row in trees_df.iterrows():
        x = float(str(row['x'])[1:]) if str(row['x']).startswith('s') else float(row['x'])
        y = float(str(row['y'])[1:]) if str(row['y']).startswith('s') else float(row['y'])
        deg = float(str(row['deg'])[1:]) if str(row['deg']).startswith('s') else float(row['deg'])
        
        vx, vy = get_tree_vertices(x, y, deg)
        all_x.extend(vx)
        all_y.extend(vy)
    
    min_x, max_x = min(all_x), max(all_x)
    min_y, max_y = min(all_y), max(all_y)
    side = max(max_x - min_x, max_y - min_y)
    return side

def calculate_score(df):
    """Calculate the total score for a submission."""
    scores = []
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        trees = df[df['id'].str.startswith(prefix)]
        if len(trees) != n:
            continue
        side = get_bounding_box(trees)
        contribution = (side ** 2) / n
        scores.append({'n': n, 'side': side, 'contribution': contribution})
    scores_df = pd.DataFrame(scores)
    total_score = scores_df['contribution'].sum()
    return total_score, scores_df

print('Functions defined')

Functions defined


In [2]:
# Check N=1 configuration in the baseline
baseline_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df = pd.read_csv(baseline_path)

# Get N=1 configuration
n1_trees = df[df['id'].str.startswith('001_')]
print('N=1 configuration:')
print(n1_trees)

# Parse the values
x = float(str(n1_trees.iloc[0]['x'])[1:])
y = float(str(n1_trees.iloc[0]['y'])[1:])
deg = float(str(n1_trees.iloc[0]['deg'])[1:])
print(f'\nPosition: ({x:.6f}, {y:.6f}), Angle: {deg:.2f} degrees')

N=1 configuration:
      id                     x                    y    deg
0  001_0  s-48.196086194214246  s58.770984615214225  s45.0

Position: (-48.196086, 58.770985), Angle: 45.00 degrees


In [3]:
# Calculate bounding box for N=1 at different angles
def get_single_tree_bbox(deg):
    """Get bounding box side for a single tree at given angle."""
    vx, vy = get_tree_vertices(0, 0, deg)
    width = max(vx) - min(vx)
    height = max(vy) - min(vy)
    return max(width, height)

# Test angles from 0 to 90 degrees
angles = np.linspace(0, 90, 91)
bbox_sizes = [get_single_tree_bbox(a) for a in angles]

best_angle = angles[np.argmin(bbox_sizes)]
best_bbox = min(bbox_sizes)

print(f'Current N=1 angle: {deg:.2f} degrees, bbox: {get_single_tree_bbox(deg):.6f}')
print(f'Best angle: {best_angle:.2f} degrees, bbox: {best_bbox:.6f}')
print(f'Improvement potential: {get_single_tree_bbox(deg) - best_bbox:.6f}')

Current N=1 angle: 45.00 degrees, bbox: 0.813173
Best angle: 45.00 degrees, bbox: 0.813173
Improvement potential: 0.000000


In [4]:
# Compare multiple pre-optimized solutions
import glob

preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/'
csv_files = [
    preopt_dir + 'santa-2025.csv',
    preopt_dir + 'bucket-of-chump/submission.csv',
    preopt_dir + 'best_ensemble.csv',
    preopt_dir + 'ensemble.csv',
]

results = []
for csv_file in csv_files:
    if os.path.exists(csv_file):
        try:
            df = pd.read_csv(csv_file)
            score, _ = calculate_score(df)
            results.append({'file': os.path.basename(csv_file), 'score': score})
            print(f'{os.path.basename(csv_file)}: {score:.6f}')
        except Exception as e:
            print(f'{os.path.basename(csv_file)}: Error - {e}')

santa-2025.csv: 70.676102


submission.csv: 70.676501


best_ensemble.csv: 70.676102


ensemble.csv: 70.676102


In [5]:
# Check santa25-public versions
public_dir = preopt_dir + 'santa25-public/'
for f in sorted(os.listdir(public_dir)):
    if f.endswith('.csv'):
        try:
            df = pd.read_csv(public_dir + f)
            score, _ = calculate_score(df)
            results.append({'file': f, 'score': score})
            print(f'{f}: {score:.6f}')
        except Exception as e:
            print(f'{f}: Error - {e}')

New_Tree_144_196.csv: 72.927920


santa2025_ver2_v61.csv: 72.951925


santa2025_ver2_v63.csv: 72.947427


santa2025_ver2_v65.csv: 72.935294


santa2025_ver2_v66.csv: 72.938599


santa2025_ver2_v67.csv: 72.938567


santa2025_ver2_v68.csv: 72.939233


santa2025_ver2_v69.csv: 72.850110


santa2025_ver2_v76.csv: 72.826444


submission_70_926149550346.csv: 70.926150


submission_70_936673758122.csv: 70.936674


submission_JKoT1.csv: 72.489483


submission_JKoT2.csv: 72.489348


submission_JKoT3.csv: 72.489488


submission_JKoT4.csv: 72.489504


submission_opt1.csv: 70.990692


In [6]:
# Find the best solution
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('score')
print('\nBest solutions:')
print(results_df.head(10))


Best solutions:
                              file      score
0                   santa-2025.csv  70.676102
2                best_ensemble.csv  70.676102
3                     ensemble.csv  70.676102
1                   submission.csv  70.676501
13  submission_70_926149550346.csv  70.926150
14  submission_70_936673758122.csv  70.936674
19             submission_opt1.csv  70.990692
16            submission_JKoT2.csv  72.489348
15            submission_JKoT1.csv  72.489483
17            submission_JKoT3.csv  72.489488


In [7]:
# Analyze score breakdown for the best solution
best_file = preopt_dir + 'santa-2025.csv'
df = pd.read_csv(best_file)
total_score, scores_df = calculate_score(df)

print(f'Total score: {total_score:.6f}')
print(f'Target: 68.919154')
print(f'Gap: {total_score - 68.919154:.6f}')

# Score by N range
print('\nScore breakdown by N range:')
for start in [1, 11, 51, 101, 151]:
    end = min(start + 49, 200)
    if start == 1:
        end = 10
    elif start == 11:
        end = 50
    range_score = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]['contribution'].sum()
    print(f'  N={start}-{end}: {range_score:.4f}')

Total score: 70.676102
Target: 68.919154
Gap: 1.756948

Score breakdown by N range:
  N=1-10: 4.3291
  N=11-50: 14.7130
  N=51-100: 17.6411
  N=101-150: 17.1441
  N=151-200: 16.8487


In [8]:
# Analyze which N values have the most room for improvement
# Theoretical minimum for N trees is when they're packed perfectly
# For a single tree, the minimum bounding box is achieved at 45 degrees

# Calculate efficiency for each N
scores_df['efficiency'] = scores_df['n'] / (scores_df['side'] ** 2)
print('\nLowest efficiency N values (most room for improvement):')
print(scores_df.nsmallest(20, 'efficiency')[['n', 'side', 'contribution', 'efficiency']])


Lowest efficiency N values (most room for improvement):
     n      side  contribution  efficiency
0    1  0.813173      0.661250    1.512287
1    2  0.949504      0.450779    2.218381
2    3  1.142031      0.434745    2.300198
4    5  1.443692      0.416850    2.398947
3    4  1.290806      0.416545    2.400702
6    7  1.673104      0.399897    2.500647
5    6  1.548438      0.399610    2.502438
8    9  1.867280      0.387415    2.581211
7    8  1.755921      0.385407    2.594658
14  15  2.384962      0.379203    2.637111
9   10  1.940696      0.376630    2.655125
20  21  2.811667      0.376451    2.656389
19  20  2.742469      0.376057    2.659172
10  11  2.033002      0.375736    2.661443
21  22  2.873270      0.375258    2.664832
15  16  2.446640      0.374128    2.672882
25  26  3.118320      0.373997    2.673819
11  12  2.114873      0.372724    2.682950
12  13  2.200046      0.372323    2.685839
24  25  3.050182      0.372144    2.687129


In [9]:
# Check if N=1 is at 45 degrees
n1 = df[df['id'].str.startswith('001_')]
deg_n1 = float(str(n1.iloc[0]['deg'])[1:])
print(f'N=1 angle: {deg_n1:.2f} degrees')
print(f'Is it at 45 degrees? {abs(deg_n1 - 45) < 1}')

# Calculate the score contribution at 45 degrees vs current
current_side = get_single_tree_bbox(deg_n1)
optimal_side = get_single_tree_bbox(45)

print(f'\nCurrent N=1 side: {current_side:.6f}, contribution: {current_side**2:.6f}')
print(f'Optimal N=1 side (45 deg): {optimal_side:.6f}, contribution: {optimal_side**2:.6f}')
print(f'Potential improvement: {current_side**2 - optimal_side**2:.6f}')

N=1 angle: 45.00 degrees
Is it at 45 degrees? True

Current N=1 side: 0.813173, contribution: 0.661250
Optimal N=1 side (45 deg): 0.813173, contribution: 0.661250
Potential improvement: 0.000000


In [10]:
# Analyze the theoretical lower bound
# For N trees, the minimum area is approximately N * (single tree area) / packing_efficiency
# Best packing efficiency for irregular shapes is typically around 0.8-0.9

# Single tree area (approximate)
from shapely.geometry import Polygon
from shapely import affinity

# Create tree polygon
trunk_w = 0.15
trunk_h = 0.2
base_w = 0.7
mid_w = 0.4
top_w = 0.25
tip_y = 0.8
tier_1_y = 0.5
tier_2_y = 0.25
base_y = 0.0
trunk_bottom_y = -trunk_h

tree_coords = [
    (0.0, tip_y),
    (top_w/2, tier_1_y), (top_w/4, tier_1_y),
    (mid_w/2, tier_2_y), (mid_w/4, tier_2_y),
    (base_w/2, base_y),
    (trunk_w/2, base_y), (trunk_w/2, trunk_bottom_y),
    (-trunk_w/2, trunk_bottom_y), (-trunk_w/2, base_y),
    (-base_w/2, base_y),
    (-mid_w/4, tier_2_y), (-mid_w/2, tier_2_y),
    (-top_w/4, tier_1_y), (-top_w/2, tier_1_y),
]

tree_poly = Polygon(tree_coords)
tree_area = tree_poly.area
print(f'Single tree area: {tree_area:.6f}')

# For N trees, theoretical minimum bounding box side
# Assuming perfect packing efficiency of 0.85
packing_efficiency = 0.85
for n in [1, 10, 50, 100, 200]:
    total_area = n * tree_area / packing_efficiency
    min_side = np.sqrt(total_area)
    min_contribution = min_side**2 / n
    print(f'N={n}: min_side={min_side:.4f}, min_contribution={min_contribution:.4f}')

Single tree area: 0.245625
N=1: min_side=0.5376, min_contribution=0.2890
N=10: min_side=1.6999, min_contribution=0.2890
N=50: min_side=3.8011, min_contribution=0.2890
N=100: min_side=5.3756, min_contribution=0.2890
N=200: min_side=7.6022, min_contribution=0.2890
