# Evolver Loop 1 Analysis

## Current Situation
- Best CV score: 70.624381 from exp_000 (baseline)
- Best LB score: N/A (no submissions yet)
- Target: 68.901319
- Gap: 1.723 points (2.50%)

## Key Questions
1. What is the score breakdown by N range?
2. Which N values have the most room for improvement?
3. What techniques from top kernels haven't been tried?

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
import os

getcontext().prec = 30

# Tree geometry
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(0), float(tip_y)),
            (float(top_w / 2), float(tier_1_y)),
            (float(top_w / 4), float(tier_1_y)),
            (float(mid_w / 2), float(tier_2_y)),
            (float(mid_w / 4), float(tier_2_y)),
            (float(base_w / 2), float(base_y)),
            (float(trunk_w / 2), float(base_y)),
            (float(trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(base_y)),
            (float(-base_w / 2), float(base_y)),
            (float(-mid_w / 4), float(tier_2_y)),
            (float(-mid_w / 2), float(tier_2_y)),
            (float(-top_w / 4), float(tier_1_y)),
            (float(-top_w / 2), float(tier_1_y)),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        x = str(row['x']).lstrip('s')
        y = str(row['y']).lstrip('s')
        deg = str(row['deg']).lstrip('s')
        trees.append(ChristmasTree(x, y, deg))
    return trees

def get_bounding_box_side(trees):
    if not trees:
        return 0
    all_coords = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_coords.append(coords)
    all_coords = np.vstack(all_coords)
    x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
    y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
    return max(x_range, y_range)

print('Scoring functions defined.')

Scoring functions defined.


In [2]:
# Load the current best submission
df = pd.read_csv('/home/submission/submission.csv')
print(f'Loaded submission with {len(df)} rows')
print(df.head())

Loaded submission with 20100 rows
      id                     x                     y                  deg
0  001_0    s43.59119209210215   s-31.78326706874178   s44.99999999999998
1  002_0   s0.1540970696213558  s-0.0385407426947946  s203.62937773065684
2  002_1  s-0.1540970696213728   s-0.561459257305224  s23.629377730656792
3  003_0   s1.1312705850687463   s0.7922028723269486  s113.56326044172948
4  003_1     s1.23405569584216    s1.275999500663759     s66.370622269343


In [3]:
# Calculate score breakdown by N
scores = {}
side_lengths = {}

for n in range(1, 201):
    trees = load_trees_for_n(df, n)
    if len(trees) != n:
        print(f'Warning: N={n} has {len(trees)} trees')
        continue
    side = get_bounding_box_side(trees)
    side_lengths[n] = side
    scores[n] = (side ** 2) / n

total_score = sum(scores.values())
print(f'Total score: {total_score:.6f}')
print(f'Target: 68.901319')
print(f'Gap: {total_score - 68.901319:.6f}')

Total score: 70.624381
Target: 68.901319
Gap: 1.723062


In [4]:
# Score breakdown by N range
ranges = [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]
print('\nScore breakdown by N range:')
for start, end in ranges:
    range_score = sum(scores[n] for n in range(start, end+1))
    print(f'  N={start:3d}-{end:3d}: {range_score:.6f} ({range_score/total_score*100:.1f}%)')


Score breakdown by N range:
  N=  1- 10: 4.329128 (6.1%)
  N= 11- 50: 14.704188 (20.8%)
  N= 51-100: 17.611858 (24.9%)
  N=101-150: 17.136254 (24.3%)
  N=151-200: 16.842953 (23.8%)


In [5]:
# Top 20 N values by score contribution
print('\nTop 20 N values by score contribution (S^2/n):')
contributions = [(n, scores[n], side_lengths[n]) for n in range(1, 201)]
contributions.sort(key=lambda x: x[1], reverse=True)

for n, contrib, side in contributions[:20]:
    print(f'  N={n:3d}: contrib={contrib:.6f}, side={side:.6f}')


Top 20 N values by score contribution (S^2/n):
  N=  1: contrib=0.661250, side=0.813173
  N=  2: contrib=0.450779, side=0.949504
  N=  3: contrib=0.434745, side=1.142031
  N=  5: contrib=0.416850, side=1.443692
  N=  4: contrib=0.416545, side=1.290806
  N=  7: contrib=0.399897, side=1.673104
  N=  6: contrib=0.399610, side=1.548438
  N=  9: contrib=0.387415, side=1.867280
  N=  8: contrib=0.385407, side=1.755921
  N= 15: contrib=0.376950, side=2.377866
  N= 10: contrib=0.376630, side=1.940696
  N= 21: contrib=0.376451, side=2.811667
  N= 20: contrib=0.376057, side=2.742469
  N= 22: contrib=0.375258, side=2.873270
  N= 11: contrib=0.374924, side=2.030803
  N= 16: contrib=0.374128, side=2.446640
  N= 26: contrib=0.373997, side=3.118320
  N= 12: contrib=0.372724, side=2.114873
  N= 13: contrib=0.372294, side=2.199960
  N= 25: contrib=0.372144, side=3.050182


In [6]:
# Analyze potential improvement areas
# The theoretical minimum for N=1 is when the tree is at 45 degrees (diagonal)
# For a tree with height 1.0 and width 0.7, at 45 degrees the bounding box is ~0.813

print('\nAnalysis of improvement potential:')
print(f'N=1 current side: {side_lengths[1]:.6f}')
print(f'N=1 theoretical min (45 deg): ~0.8132 (score contribution: 0.6613)')
print(f'N=1 current contribution: {scores[1]:.6f}')

# For small N, the improvement potential is highest
print('\nImprovement potential by range (assuming 1% reduction in side length):')
for start, end in ranges:
    current_range_score = sum(scores[n] for n in range(start, end+1))
    # 1% reduction in side = 2% reduction in score (since S^2)
    potential_improvement = current_range_score * 0.02
    print(f'  N={start:3d}-{end:3d}: potential improvement = {potential_improvement:.6f}')


Analysis of improvement potential:
N=1 current side: 0.813173
N=1 theoretical min (45 deg): ~0.8132 (score contribution: 0.6613)
N=1 current contribution: 0.661250

Improvement potential by range (assuming 1% reduction in side length):
  N=  1- 10: potential improvement = 0.086583
  N= 11- 50: potential improvement = 0.294084
  N= 51-100: potential improvement = 0.352237
  N=101-150: potential improvement = 0.342725
  N=151-200: potential improvement = 0.336859


In [7]:
# Check if there are better solutions in other snapshots
import glob

snapshot_dirs = glob.glob('/home/nonroot/snapshots/santa-2025/*/code/submission_candidates/')
print(f'Found {len(snapshot_dirs)} snapshot directories with candidates')

# Sample a few to check scores
best_per_n = {n: scores[n] for n in range(1, 201)}
best_source = {n: 'current' for n in range(1, 201)}

for snap_dir in snapshot_dirs[:5]:  # Check first 5
    csv_files = glob.glob(snap_dir + '*.csv')
    for csv_file in csv_files[:3]:  # Check first 3 files per snapshot
        try:
            df_snap = pd.read_csv(csv_file)
            for n in [1, 2, 3, 10, 50, 100, 200]:  # Sample N values
                trees = load_trees_for_n(df_snap, n)
                if len(trees) == n:
                    side = get_bounding_box_side(trees)
                    score = (side ** 2) / n
                    if score < best_per_n[n]:
                        best_per_n[n] = score
                        best_source[n] = csv_file
        except:
            pass

print('\nBetter solutions found in snapshots:')
for n in [1, 2, 3, 10, 50, 100, 200]:
    if best_source[n] != 'current':
        improvement = scores[n] - best_per_n[n]
        print(f'  N={n}: improvement={improvement:.6f} from {best_source[n]}')
    else:
        print(f'  N={n}: current is best')

Found 71 snapshot directories with candidates



Better solutions found in snapshots:
  N=1: current is best
  N=2: current is best
  N=3: current is best
  N=10: current is best
  N=50: current is best
  N=100: current is best
  N=200: current is best
