# Evolver Loop 5 Analysis

## Submission Failure Analysis

exp_004 failed with "Solution and submission values for id do not match"

This is a different error than the overlap errors. Let's investigate.

In [None]:
import pandas as pd
import os

# Load the failed submission
sub = pd.read_csv('/home/submission/submission.csv')
print(f"Submission shape: {sub.shape}")
print(f"Unique N values: {len(sub['id'].str.split('_').str[0].unique())}")

# Check expected IDs
expected_ids = []
for n in range(1, 201):
    for i in range(n):
        expected_ids.append(f"{n:03d}_{i}")

print(f"Expected IDs: {len(expected_ids)}")
print(f"Actual IDs: {len(sub['id'])}")

# Check for mismatches
sub_ids = set(sub['id'].tolist())
expected_set = set(expected_ids)

missing = expected_set - sub_ids
extra = sub_ids - expected_set

print(f"\nMissing IDs: {len(missing)}")
if missing:
    print(f"First 10 missing: {sorted(list(missing))[:10]}")
print(f"Extra IDs: {len(extra)}")
if extra:
    print(f"First 10 extra: {sorted(list(extra))[:10]}")

# Identify which N values have wrong format
wrong_format_n = set()
for id_val in extra:
    n = int(id_val.split('_')[0])
    wrong_format_n.add(n)
print(f"\nN values with wrong ID format: {sorted(wrong_format_n)}")

In [None]:
# Find which snapshots have wrong ID format
snapshot_base = '/home/nonroot/snapshots/santa-2025/'
bad_snapshots = []

for snap_dir in sorted(os.listdir(snapshot_base)):
    sub_path = os.path.join(snapshot_base, snap_dir, 'submission', 'submission.csv')
    if not os.path.exists(sub_path):
        continue
    
    with open(sub_path, 'r') as f:
        next(f)  # Skip header
        for line in f:
            parts = line.strip().split(',')
            if len(parts) != 4:
                continue
            id_val = parts[0]
            # Check if any ID has wrong format (e.g., 013_000 instead of 013_0)
            n_str, idx_str = id_val.split('_')
            n = int(n_str)
            if len(idx_str) > len(str(n-1)):  # Wrong format
                bad_snapshots.append(snap_dir)
                break

print(f"Snapshots with wrong ID format: {len(bad_snapshots)}")
print(f"Bad snapshots: {bad_snapshots}")

In [None]:
# Analyze the gap to target
print("=" * 60)
print("GAP ANALYSIS")
print("=" * 60)

target = 68.887226
best_lb = 70.615107  # exp_001
best_cv = 70.522682  # exp_004 (but failed submission)

print(f"Target score: {target}")
print(f"Best LB score: {best_lb}")
print(f"Best CV score: {best_cv}")
print(f"")
print(f"Gap from best LB to target: {best_lb - target:.6f} ({(best_lb - target) / target * 100:.2f}%)")
print(f"Gap from best CV to target: {best_cv - target:.6f} ({(best_cv - target) / target * 100:.2f}%)")
print(f"")
print("CRITICAL: We need to improve by ~1.73 points (2.5%)")
print("The ensemble approach only gave 0.05 improvement.")
print("At this rate, we need ~35x more improvement!")

In [None]:
# Analyze per-N contributions to understand where improvements are possible
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, angle):
    poly = Polygon(zip(TX, TY))
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def calculate_side(trees):
    polys = [create_tree_polygon(*t) for t in trees]
    union = unary_union(polys)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def calculate_score_for_n(trees, n):
    side = calculate_side(trees)
    return (side ** 2) / n

print("Functions defined!")

In [None]:
# Load baseline and calculate per-N scores
baseline_path = '/home/nonroot/snapshots/santa-2025/21145966992/submission/submission.csv'
baseline = pd.read_csv(baseline_path)

# Parse coordinates (handle 's' prefix)
def parse_coord(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

baseline['x_float'] = baseline['x'].apply(parse_coord)
baseline['y_float'] = baseline['y'].apply(parse_coord)
baseline['deg_float'] = baseline['deg'].apply(parse_coord)
baseline['n'] = baseline['id'].str.split('_').str[0].astype(int)

# Calculate per-N scores
per_n_scores = {}
for n in range(1, 201):
    n_data = baseline[baseline['n'] == n]
    trees = list(zip(n_data['x_float'], n_data['y_float'], n_data['deg_float']))
    per_n_scores[n] = calculate_score_for_n(trees, n)

print(f"Total baseline score: {sum(per_n_scores.values()):.6f}")
print(f"\nTop 10 contributors to score:")
for n, score in sorted(per_n_scores.items(), key=lambda x: -x[1])[:10]:
    print(f"  N={n}: {score:.6f}")

In [None]:
# Calculate theoretical minimum for each N
import math

# Single tree at 45 degrees has minimum bounding box
single_tree_min_side = 0.813173  # Known optimal for N=1

print("\nPer-N analysis (baseline vs theoretical):")
print("N\tBaseline\tTheoretical\tGap\t\t% Gap")
print("-" * 70)

total_gap = 0
for n in [1, 2, 3, 4, 5, 10, 20, 50, 100, 200]:
    baseline_score = per_n_scores[n]
    # Theoretical minimum: if we could pack n trees with no wasted space
    # Area needed = n * single_tree_area, side = sqrt(area)
    # But this is a lower bound, not achievable
    theoretical = (single_tree_min_side ** 2) / n  # Lower bound (single tree scaled)
    gap = baseline_score - theoretical
    pct_gap = gap / baseline_score * 100
    total_gap += gap
    print(f"{n}\t{baseline_score:.6f}\t{theoretical:.6f}\t{gap:.6f}\t{pct_gap:.1f}%")

print(f"\nTotal gap from theoretical minimum: {total_gap:.6f}")

In [None]:
# Analyze what techniques could close the gap
print("=" * 60)
print("STRATEGY ANALYSIS")
print("=" * 60)

print("""
1. ENSEMBLE APPROACH (current)
   - Combines best per-N from 114 snapshots
   - Improvement: 0.05 points
   - Problem: All snapshots are from same optimizer family
   - Ceiling: ~70.5 (can't go lower with existing solutions)

2. FRACTIONAL TRANSLATION (from top kernel)
   - Step sizes: [0.001, 0.0005, 0.0002, 0.0001, 0.00005, 0.00002, 0.00001]
   - 8 directions: N, S, E, W, NE, NW, SE, SW
   - Can be implemented in pure Python
   - Expected improvement: 0.1-0.5 points per N

3. NOVEL ALGORITHMS (required for target)
   - No-Fit Polygon (NFP) for O(1) collision checks
   - Branch-and-bound for small N (exact solutions)
   - Genetic algorithm with custom operators
   - Constraint programming

4. KEY INSIGHT: Top teams have 900+ submissions
   - They accumulate best per-N over many experiments
   - Each experiment tries to improve SOME N values
   - Final = ensemble of all best per-N
   - We have 93 submissions remaining - USE THEM!
""")

In [None]:
# Summary of findings
print("=" * 60)
print("KEY FINDINGS FOR NEXT EXPERIMENT")
print("=" * 60)

print("""
1. exp_004 FAILED because of ID format mismatch
   - Some snapshots use '013_000' instead of '013_0'
   - Bad snapshots: 21145963314, 21337107511
   - FIX: Filter out bad snapshots OR normalize IDs

2. Gap to target is 1.73 points (2.5%)
   - Ensemble gave only 0.05 improvement
   - Need ~35x more improvement
   - MUST implement novel algorithms

3. Fractional translation is implementable in Python
   - Step sizes: [0.001, 0.0005, 0.0002, 0.0001, 0.00005, 0.00002, 0.00001]
   - 8 directions
   - Can improve individual N values

4. Per-N tracking is critical
   - N=1 contributes 0.66 (highest)
   - Small N (1-10) contribute ~4.0 total
   - Focus on small N for biggest impact

5. Validation requirements:
   - ID format: NNN_I (e.g., 013_0, not 013_000)
   - Precision: 18+ decimal places
   - Overlap: Kaggle uses integer scaling (1e18)
""")