# Evolver Loop 2 Analysis

Goal: Find the best pre-optimized submission across ALL snapshots and understand what's needed to reach target 68.919154

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os
from glob import glob
import warnings
warnings.filterwarnings('ignore')

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
BASE_TREE = Polygon(zip(TX, TY))

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    tree = affinity.rotate(BASE_TREE, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

def calculate_score(df):
    """Calculate total score for a submission"""
    total = 0
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        n_trees = df[df['id'].str.startswith(prefix)]
        if len(n_trees) != n:
            return None  # Invalid submission
        
        all_coords = []
        for _, row in n_trees.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            poly = create_tree_polygon(x, y, deg)
            coords = np.array(poly.exterior.coords)
            all_coords.append(coords)
        
        all_coords = np.vstack(all_coords)
        x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
        y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
        side = max(x_range, y_range)
        total += side**2 / n
    
    return total

print("Functions defined")

Functions defined


In [2]:
# Find ALL CSV files with 20101 lines (valid submissions)
import subprocess

result = subprocess.run(
    ['find', '/home/nonroot/snapshots/santa-2025', '-name', '*.csv', '-size', '+1M'],
    capture_output=True, text=True
)
all_csvs = result.stdout.strip().split('\n')
print(f"Found {len(all_csvs)} CSV files")

# Filter to valid submissions (20101 lines)
valid_csvs = []
for csv_path in all_csvs:
    if not csv_path:
        continue
    try:
        result = subprocess.run(['wc', '-l', csv_path], capture_output=True, text=True)
        lines = int(result.stdout.split()[0])
        if lines == 20101:
            valid_csvs.append(csv_path)
    except:
        pass

print(f"Found {len(valid_csvs)} valid submissions (20101 lines)")

Found 806 CSV files


Found 806 valid submissions (20101 lines)


In [3]:
# Score a sample of submissions to find the best ones
import random

# First, let's check the known good files
known_files = [
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv',
    '/home/nonroot/snapshots/santa-2025/21164519357/code/exploration/santa-2025.csv',
]

scores = {}
for f in known_files:
    try:
        df = pd.read_csv(f)
        score = calculate_score(df)
        if score:
            scores[f] = score
            print(f"{os.path.basename(f)}: {score:.6f}")
    except Exception as e:
        print(f"Error with {f}: {e}")

santa-2025.csv: 70.676102


santa-2025.csv: 70.658891


In [4]:
# Check more files - sample from valid_csvs
print("\nChecking more files...")
for f in valid_csvs[:50]:  # Check first 50
    if f in scores:
        continue
    try:
        df = pd.read_csv(f)
        score = calculate_score(df)
        if score and score < 75:  # Only keep good scores
            scores[f] = score
            if score < 70.7:
                print(f"{score:.6f}: {f}")
    except Exception as e:
        pass

print(f"\nTotal files scored: {len(scores)}")


Checking more files...


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/experiments/004_sa_v1_parallel/submission_best.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/experiments/004_sa_v1_parallel/submission_v18.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/experiments/005_backward_propagation/submission.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/experiments/005_backward_propagation/submission_v21.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/experiments/005_backward_propagation/optimized.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/experiments/002_preoptimized/submission.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/submission.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/submission_candidates/candidate_004.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/submission_candidates/candidate_002.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/submission_candidates/candidate_003.csv


70.676099: /home/nonroot/snapshots/santa-2025/21116303805/code/submission_candidates/candidate_001.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv/santa-2025.csv


70.676501: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv


70.676501: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv


70.676102: /home/nonroot/snapshots/santa-2025/21116303805/submission/submission.csv


70.676102: /home/nonroot/snapshots/santa-2025/21121776553/code/experiments/001_baseline/submission.csv


70.676102: /home/nonroot/snapshots/santa-2025/21121776553/code/submission_candidates/candidate_000.csv


70.676102: /home/nonroot/snapshots/santa-2025/21165872902/code/experiments/003_tree_packer_v21/output.csv


70.676102: /home/nonroot/snapshots/santa-2025/21165872902/code/experiments/002_bbox3_medium/output.csv


70.676102: /home/nonroot/snapshots/santa-2025/21165872902/code/experiments/004_ensemble/ensemble.csv


70.676102: /home/nonroot/snapshots/santa-2025/21165872902/code/experiments/002_tree_packer_v18/output.csv

Total files scored: 50


In [5]:
# Find the best score
best_file = min(scores, key=scores.get)
best_score = scores[best_file]
print(f"\nBest score found: {best_score:.6f}")
print(f"File: {best_file}")
print(f"\nTarget: 68.919154")
print(f"Gap: {best_score - 68.919154:.6f} ({(best_score - 68.919154) / 68.919154 * 100:.2f}%)")


Best score found: 70.658891
File: /home/nonroot/snapshots/santa-2025/21164519357/code/exploration/santa-2025.csv

Target: 68.919154
Gap: 1.739737 (2.52%)


In [6]:
# Check all files with scores below 70.7
print("\nAll files with score < 70.7:")
for f, s in sorted(scores.items(), key=lambda x: x[1]):
    if s < 70.7:
        print(f"{s:.6f}: {os.path.basename(f)}")


All files with score < 70.7:
70.658891: santa-2025.csv
70.676099: candidate_001.csv
70.676102: candidate_000.csv
70.676102: submission.csv
70.676102: output.csv
70.676102: submission_v18.csv
70.676102: output.csv
70.676102: santa-2025.csv
70.676102: submission_best.csv
70.676102: submission.csv
70.676102: submission_v21.csv
70.676102: optimized.csv
70.676102: submission.csv
70.676102: candidate_004.csv
70.676102: candidate_002.csv
70.676102: candidate_003.csv
70.676102: ensemble.csv
70.676102: santa-2025.csv
70.676102: best_ensemble.csv
70.676102: submission.csv
70.676102: submission.csv
70.676102: ensemble.csv
70.676102: output.csv
70.676501: submission.csv
70.676501: submission.csv


In [7]:
# Analyze the best file - which N values have room for improvement?
best_df = pd.read_csv(best_file)

print("Score breakdown by N (top 20 contributors):")
n_scores = []
for n in range(1, 201):
    prefix = f"{n:03d}_"
    n_trees = best_df[best_df['id'].str.startswith(prefix)]
    
    all_coords = []
    for _, row in n_trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        coords = np.array(poly.exterior.coords)
        all_coords.append(coords)
    
    all_coords = np.vstack(all_coords)
    x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
    y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
    side = max(x_range, y_range)
    score = side**2 / n
    n_scores.append((n, side, score))

# Sort by score contribution (highest first)
n_scores_sorted = sorted(n_scores, key=lambda x: x[2], reverse=True)
for n, side, score in n_scores_sorted[:20]:
    print(f"N={n:3d}: side={side:.6f}, score={score:.6f}")

Score breakdown by N (top 20 contributors):


N=  1: side=0.813173, score=0.661250
N=  2: side=0.949504, score=0.450779
N=  3: side=1.142031, score=0.434745
N=  5: side=1.443692, score=0.416850
N=  4: side=1.290806, score=0.416545
N=  7: side=1.673104, score=0.399897
N=  6: side=1.548438, score=0.399610
N=  9: side=1.867280, score=0.387415
N=  8: side=1.755921, score=0.385407
N= 15: side=2.384962, score=0.379203
N= 10: side=1.940696, score=0.376630
N= 21: side=2.811667, score=0.376451
N= 20: side=2.742469, score=0.376057
N= 11: side=2.033002, score=0.375736
N= 22: side=2.873270, score=0.375258
N= 16: side=2.446640, score=0.374128
N= 26: side=3.118320, score=0.373997
N= 12: side=2.114873, score=0.372724
N= 13: side=2.199960, score=0.372294
N= 25: side=3.050182, score=0.372144


In [9]:
# Calculate theoretical lower bound
# Tree area is approximately 0.2625 (from BASE_TREE.area)
tree_area = BASE_TREE.area
print(f"Tree area: {tree_area:.6f}")

# For N trees, minimum bounding box area >= N * tree_area
# So minimum side >= sqrt(N * tree_area)
# And minimum score contribution >= N * tree_area / N = tree_area

print("\nTheoretical lower bounds (assuming perfect packing):")
theoretical_total = 0
for n in range(1, 201):
    # Minimum side is sqrt(N * tree_area) if trees could pack perfectly
    min_side = np.sqrt(n * tree_area)
    min_score = min_side**2 / n  # = tree_area
    theoretical_total += min_score

print(f"Theoretical minimum (perfect packing): {theoretical_total:.6f}")
print(f"Current best: {best_score:.6f}")
print(f"Target: 68.919154")
print(f"\nPacking efficiency needed for target: {theoretical_total / 68.919154 * 100:.1f}%")
print(f"Current packing efficiency: {theoretical_total / best_score * 100:.1f}%")

Tree area: 0.245625

Theoretical lower bounds (assuming perfect packing):
Theoretical minimum (perfect packing): 49.125000
Current best: 70.658891
Target: 68.919154

Packing efficiency needed for target: 71.3%
Current packing efficiency: 69.5%


In [None]:
# Check which N values have the most room for improvement
print("\nN values with most room for improvement (vs theoretical):")
improvements = []
for n, side, score in n_scores:
    min_side = np.sqrt(n * tree_area)
    min_score = tree_area  # = min_side^2 / n
    room = score - min_score
    efficiency = min_score / score * 100
    improvements.append((n, side, score, min_score, room, efficiency))

# Sort by room for improvement
improvements_sorted = sorted(improvements, key=lambda x: x[4], reverse=True)
print("\nTop 20 N values with most room for improvement:")
for n, side, score, min_score, room, eff in improvements_sorted[:20]:
    print(f"N={n:3d}: score={score:.4f}, theoretical={min_score:.4f}, room={room:.4f}, eff={eff:.1f}%")

In [10]:
# Check which N values have the most room for improvement
print("\nN values with most room for improvement (vs theoretical):")
improvements = []
for n, side, score in n_scores:
    min_side = np.sqrt(n * tree_area)
    min_score = tree_area  # = min_side^2 / n
    room = score - min_score
    efficiency = min_score / score * 100
    improvements.append((n, side, score, min_score, room, efficiency))

# Sort by room for improvement
improvements_sorted = sorted(improvements, key=lambda x: x[4], reverse=True)
print("\nTop 20 N values with most room for improvement:")
for n, side, score, min_score, room, eff in improvements_sorted[:20]:
    print(f"N={n:3d}: score={score:.4f}, theoretical={min_score:.4f}, room={room:.4f}, eff={eff:.1f}%")


N values with most room for improvement (vs theoretical):

Top 20 N values with most room for improvement:
N=  1: score=0.6612, theoretical=0.2456, room=0.4156, eff=37.1%
N=  2: score=0.4508, theoretical=0.2456, room=0.2052, eff=54.5%
N=  3: score=0.4347, theoretical=0.2456, room=0.1891, eff=56.5%
N=  5: score=0.4168, theoretical=0.2456, room=0.1712, eff=58.9%
N=  4: score=0.4165, theoretical=0.2456, room=0.1709, eff=59.0%
N=  7: score=0.3999, theoretical=0.2456, room=0.1543, eff=61.4%
N=  6: score=0.3996, theoretical=0.2456, room=0.1540, eff=61.5%
N=  9: score=0.3874, theoretical=0.2456, room=0.1418, eff=63.4%
N=  8: score=0.3854, theoretical=0.2456, room=0.1398, eff=63.7%
N= 15: score=0.3792, theoretical=0.2456, room=0.1336, eff=64.8%
N= 10: score=0.3766, theoretical=0.2456, room=0.1310, eff=65.2%
N= 21: score=0.3765, theoretical=0.2456, room=0.1308, eff=65.2%
N= 20: score=0.3761, theoretical=0.2456, room=0.1304, eff=65.3%
N= 11: score=0.3757, theoretical=0.2456, room=0.1301, eff=65

In [11]:
# Check more files from different snapshots to find the absolute best\nprint("Checking more files from all snapshots...\")\n\n# Sample more files\nfor f in valid_csvs[50:200]:  # Check next 150\n    if f in scores:\n        continue\n    try:\n        df = pd.read_csv(f)\n        score = calculate_score(df)\n        if score and score < 75:\n            scores[f] = score\n            if score < 70.66:  # Only print if better than current best\n                print(f\"{score:.6f}: {f}\")\n    except Exception as e:\n        pass\n\nprint(f\"\\nTotal files scored: {len(scores)}\")\n\n# Find the best\nbest_file = min(scores, key=scores.get)\nbest_score = scores[best_file]\nprint(f\"\\nBest score found: {best_score:.6f}\")\nprint(f\"File: {best_file}\")

In [12]:
# Check more files from different snapshots to find the absolute best
print("Checking more files from all snapshots...")

# Sample more files
for f in valid_csvs[50:200]:  # Check next 150
    if f in scores:
        continue
    try:
        df = pd.read_csv(f)
        score = calculate_score(df)
        if score and score < 75:
            scores[f] = score
            if score < 70.66:  # Only print if better than current best
                print(f"{score:.6f}: {f}")
    except Exception as e:
        pass

print(f"\nTotal files scored: {len(scores)}")

# Find the best
best_file = min(scores, key=scores.get)
best_score = scores[best_file]
print(f"\nBest score found: {best_score:.6f}")
print(f"File: {best_file}")

Checking more files from all snapshots...


70.658891: /home/nonroot/snapshots/santa-2025/21165872902/code/submission_candidates/candidate_001.csv


70.658891: /home/nonroot/snapshots/santa-2025/21165872902/submission/submission.csv


70.659982: /home/nonroot/snapshots/santa-2025/21164519357/code/exploration/submission.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/submission_temp.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/experiments/011_comprehensive_ensemble/ensemble_valid.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/experiments/011_long_optimization/submission.csv


70.659943: /home/nonroot/snapshots/santa-2025/21145963314/code/experiments/017_just_luck_multiphase/eazy_output.csv


70.659943: /home/nonroot/snapshots/santa-2025/21145963314/code/experiments/015_crodoc_ensemble/submission.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/research/kernels/jazivxt_eazy-optimizer/submission_input.csv


70.659944: /home/nonroot/snapshots/santa-2025/21145963314/code/research/kernels/jazivxt_eazy-optimizer/submission.csv


39.508442: /home/nonroot/snapshots/santa-2025/21145963314/code/submission.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/submission_candidates/candidate_007.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/submission_candidates/candidate_008.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/submission_candidates/candidate_005.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/submission_candidates/candidate_006.csv


70.659982: /home/nonroot/snapshots/santa-2025/21145963314/code/external_data/bucket/submission.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145963314/code/external_data/saspav_latest/santa-2025.csv


70.659958: /home/nonroot/snapshots/santa-2025/21145965159/code/experiments/better_solution.csv


70.659959: /home/nonroot/snapshots/santa-2025/21145965159/code/experiments/009_better_baseline/submission.csv


70.659959: /home/nonroot/snapshots/santa-2025/21145965159/code/experiments/009_snapshot_best/submission.csv


70.659944: /home/nonroot/snapshots/santa-2025/21145965159/code/experiments/009_snapshot_ensemble/ensemble.csv


70.659944: /home/nonroot/snapshots/santa-2025/21145965159/code/experiments/009_snapshot_ensemble/submission.csv


70.659944: /home/nonroot/snapshots/santa-2025/21145965159/code/submission_candidates/candidate_007.csv



Total files scored: 167

Best score found: 39.508442
File: /home/nonroot/snapshots/santa-2025/21145963314/code/submission.csv


In [13]:
# Check the suspicious 39.5 score file\nsuspicious_file = '/home/nonroot/snapshots/santa-2025/21145963314/code/submission.csv'\ndf_sus = pd.read_csv(suspicious_file)\nprint(f\"Rows: {len(df_sus)}\")\nprint(f\"Columns: {df_sus.columns.tolist()}\")\nprint(df_sus.head(10))\n\n# Check if it has all N values\nfor n in range(1, 11):\n    prefix = f\"{n:03d}_\"\n    count = len(df_sus[df_sus['id'].str.startswith(prefix)])\n    print(f\"N={n}: {count} trees\")

In [14]:
# Check the suspicious 39.5 score file
suspicious_file = '/home/nonroot/snapshots/santa-2025/21145963314/code/submission.csv'
df_sus = pd.read_csv(suspicious_file)
print(f"Rows: {len(df_sus)}")
print(f"Columns: {df_sus.columns.tolist()}")
print(df_sus.head(10))

# Check if it has all N values
for n in range(1, 11):
    prefix = f"{n:03d}_"
    count = len(df_sus[df_sus['id'].str.startswith(prefix)])
    print(f"N={n}: {count} trees")

Rows: 20100
Columns: ['id', 'x', 'y', 'deg']
      id                    x                   y                  deg
0  001_0  s48.196086194214246  s8.770984615214225   s5.000000000000000
1  002_0   s0.154097069621356  s0.038540742694795   s3.629377730656840
2  002_1   s0.154097069621373  s0.561459257305224   s3.629377730656792
3  003_0   s0.123655816140301  s0.781101815992563  s11.125132292892999
4  003_1   s0.234055695842160  s0.275999500663759   s6.370622269343000
5  003_2   s0.641714640229075  s0.180458566613381  s55.134051937100821
6  004_0   s0.324747789590876  s0.132109978091186  s56.370622145636403
7  004_1   s0.315354346241134  s0.132109978066476  s56.370622269264103
8  004_2   s0.324747789590876  s0.732109978066475  s36.370622269263997
9  004_3   s0.315354348136322  s0.732109978091186  s36.370622145636453
N=1: 1 trees
N=2: 2 trees
N=3: 3 trees
N=4: 4 trees
N=5: 5 trees
N=6: 6 trees
N=7: 7 trees
N=8: 8 trees
N=9: 9 trees
N=10: 10 trees


In [15]:
# Check for overlaps in the suspicious file\ndef check_overlaps_for_n(df, n):\n    prefix = f\"{n:03d}_\"\n    n_trees = df[df['id'].str.startswith(prefix)]\n    \n    polys = []\n    for _, row in n_trees.iterrows():\n        x = parse_value(row['x'])\n        y = parse_value(row['y'])\n        deg = parse_value(row['deg'])\n        polys.append(create_tree_polygon(x, y, deg))\n    \n    # Check for overlaps\n    for i in range(len(polys)):\n        for j in range(i+1, len(polys)):\n            if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):\n                inter = polys[i].intersection(polys[j])\n                if inter.area > 1e-10:\n                    return True, inter.area\n    return False, 0\n\nprint(\"Checking overlaps in suspicious file...\")\nfor n in range(1, 21):\n    has_overlap, area = check_overlaps_for_n(df_sus, n)\n    if has_overlap:\n        print(f\"N={n}: OVERLAP! area={area:.10f}\")

In [None]:
# Check for overlaps in the suspicious file
def check_overlaps_for_n(df, n):
    prefix = f"{n:03d}_"
    n_trees = df[df['id'].str.startswith(prefix)]
    
    polys = []
    for _, row in n_trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        polys.append(create_tree_polygon(x, y, deg))
    
    # Check for overlaps
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):
                inter = polys[i].intersection(polys[j])
                if inter.area > 1e-10:
                    return True, inter.area
    return False, 0

print("Checking overlaps in suspicious file...")
for n in range(1, 21):
    has_overlap, area = check_overlaps_for_n(df_sus, n)
    if has_overlap:
        print(f"N={n}: OVERLAP! area={area:.10f}")