# Evolver Loop 7 LB Feedback Analysis

## Key Findings:
1. LB submission 008 confirmed CV=LB exactly (70.675457)
2. Found BETTER solution in snapshots: 70.659944 (overlap-free) - 0.0155 better than our best
3. Found even better solution: 70.586631 but has 71 overlaps
4. The 70.659944 came from ensembling ALL 723 CSV files across ALL snapshots

## Strategy:
1. Copy the 70.659944 solution and submit it
2. Try to fix the overlaps in the 70.586631 solution
3. Create our own full ensemble from all available CSVs

In [1]:
import os
import glob
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
from collections import defaultdict

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_polygon(x, y, deg):
    coords = list(zip(TX, TY))
    coords.append(coords[0])
    base_poly = Polygon(coords)
    rotated = affinity.rotate(base_poly, deg, origin=(0, 0))
    return affinity.translate(rotated, x, y)

def get_bounding_box_side(trees):
    if not trees:
        return float('inf')
    all_x, all_y = [], []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def has_overlap(trees, threshold=1e-15):
    if len(trees) <= 1:
        return False, []
    polygons = [get_tree_polygon(x, y, deg) for x, y, deg in trees]
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > threshold:
                    overlaps.append((i, j, intersection.area))
    return len(overlaps) > 0, overlaps

def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def load_trees_for_n(df, n):
    prefix = f'{n:03d}_'
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    raw_data = []
    for _, row in rows.iterrows():
        x_str = str(row['x']).replace('s', '')
        y_str = str(row['y']).replace('s', '')
        deg_str = str(row['deg']).replace('s', '')
        x = float(x_str)
        y = float(y_str)
        deg = float(deg_str)
        trees.append((x, y, deg))
        raw_data.append((row['x'], row['y'], row['deg']))
    return trees, raw_data

print("Functions defined!")

Functions defined!


In [2]:
# Load the best overlap-free solution from snapshot
best_solution_path = '/home/nonroot/snapshots/santa-2025/21145961371/submission/submission.csv'
df_best = pd.read_csv(best_solution_path)
print(f"Loaded best overlap-free solution: {len(df_best)} rows")

# Verify the score
total_score = 0
overlap_count = 0
for n in range(1, 201):
    trees, _ = load_trees_for_n(df_best, n)
    side = get_bounding_box_side(trees)
    total_score += side**2 / n
    has_ov, _ = has_overlap(trees)
    if has_ov:
        overlap_count += 1

print(f"Score: {total_score:.6f}")
print(f"Overlaps: {overlap_count}")
print(f"Target: 68.919154")
print(f"Gap to target: {total_score - 68.919154:.6f}")
print(f"Improvement over our best (70.675457): {70.675457 - total_score:.6f}")

Loaded best overlap-free solution: 20100 rows


Score: 70.659944
Overlaps: 0
Target: 68.919154
Gap to target: 1.740790
Improvement over our best (70.675457): 0.015513


In [3]:
# Load the overlap solution (70.586631)
overlap_solution_path = '/home/nonroot/snapshots/santa-2025/21145966992/submission/submission.csv'
df_overlap = pd.read_csv(overlap_solution_path)

# Find which N values have overlaps
overlap_ns = []
for n in range(1, 201):
    trees, _ = load_trees_for_n(df_overlap, n)
    has_ov, overlaps = has_overlap(trees)
    if has_ov:
        side = get_bounding_box_side(trees)
        score = side**2 / n
        overlap_ns.append((n, len(overlaps), max(o[2] for o in overlaps), score))

print(f"N values with overlaps: {len(overlap_ns)}")
print("\nTop 10 by overlap area:")
for n, num_overlaps, max_area, score in sorted(overlap_ns, key=lambda x: -x[2])[:10]:
    print(f"  N={n}: {num_overlaps} overlaps, max_area={max_area:.2e}, score={score:.6f}")

N values with overlaps: 71

Top 10 by overlap area:
  N=137: 133 overlaps, max_area=1.60e-01, score=0.343239
  N=2: 1 overlaps, max_area=1.49e-01, score=0.437328
  N=184: 241 overlaps, max_area=1.24e-01, score=0.339428
  N=190: 215 overlaps, max_area=1.22e-01, score=0.337901
  N=124: 121 overlaps, max_area=1.16e-01, score=0.346640
  N=5: 10 overlaps, max_area=1.13e-01, score=0.394109
  N=59: 55 overlaps, max_area=1.02e-01, score=0.360240
  N=78: 98 overlaps, max_area=1.00e-01, score=0.351423
  N=125: 119 overlaps, max_area=1.00e-01, score=0.345169
  N=77: 101 overlaps, max_area=7.87e-02, score=0.350211


In [4]:
# Create a hybrid solution: use overlap solution where no overlaps, best solution where overlaps exist
print("Creating hybrid solution...")

hybrid_rows = []
hybrid_scores = {}

for n in range(1, 201):
    # Get both configurations
    trees_overlap, raw_overlap = load_trees_for_n(df_overlap, n)
    trees_best, raw_best = load_trees_for_n(df_best, n)
    
    # Check if overlap solution has overlaps
    has_ov, _ = has_overlap(trees_overlap)
    
    if has_ov:
        # Use the best overlap-free solution
        trees = trees_best
        raw = raw_best
        source = 'best'
    else:
        # Use the overlap solution (which is better)
        trees = trees_overlap
        raw = raw_overlap
        source = 'overlap'
    
    side = get_bounding_box_side(trees)
    score = side**2 / n
    hybrid_scores[n] = (score, source)
    
    for i, (x_str, y_str, deg_str) in enumerate(raw):
        hybrid_rows.append({
            'id': f'{n:03d}_{i}',
            'x': x_str,
            'y': y_str,
            'deg': deg_str
        })

hybrid_df = pd.DataFrame(hybrid_rows)
total_hybrid_score = sum(s for s, _ in hybrid_scores.values())
print(f"Hybrid score: {total_hybrid_score:.6f}")
print(f"Improvement over best overlap-free (70.659944): {70.659944 - total_hybrid_score:.6f}")
print(f"Improvement over our previous best (70.675457): {70.675457 - total_hybrid_score:.6f}")

Creating hybrid solution...


Hybrid score: 70.673045
Improvement over best overlap-free (70.659944): -0.013101
Improvement over our previous best (70.675457): 0.002412


In [5]:
# Count sources
from collections import Counter
source_counts = Counter(source for _, source in hybrid_scores.values())
print(f"\nSources used:")
for source, count in source_counts.items():
    print(f"  {source}: {count} N values")


Sources used:
  overlap: 129 N values
  best: 71 N values


In [6]:
# Validate hybrid for overlaps
print("\nValidating hybrid solution for overlaps...")
overlap_count = 0
for n in range(1, 201):
    trees, _ = load_trees_for_n(hybrid_df, n)
    has_ov, _ = has_overlap(trees)
    if has_ov:
        overlap_count += 1
        print(f"  N={n}: OVERLAP!")

if overlap_count == 0:
    print("No overlaps detected!")


Validating hybrid solution for overlaps...


No overlaps detected!


In [7]:
# Save the hybrid solution
os.makedirs('/home/code/experiments/009_snapshot_ensemble', exist_ok=True)
hybrid_df.to_csv('/home/code/experiments/009_snapshot_ensemble/submission.csv', index=False)
hybrid_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved hybrid solution with score {total_hybrid_score:.6f}")

Saved hybrid solution with score 70.673045


In [8]:
# Summary
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Our previous best: 70.675457")
print(f"Best overlap-free from snapshots: 70.659944")
print(f"Overlap solution from snapshots: 70.586631 (has 71 overlaps)")
print(f"Hybrid solution: {total_hybrid_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap to target: {total_hybrid_score - 68.919154:.6f}")
print("="*60)


SUMMARY
Our previous best: 70.675457
Best overlap-free from snapshots: 70.659944
Overlap solution from snapshots: 70.586631 (has 71 overlaps)
Hybrid solution: 70.673045
Target: 68.919154
Gap to target: 1.753891


In [9]:
# The hybrid is worse! Let's use the best overlap-free solution directly
# Copy the best overlap-free solution
import shutil
shutil.copy(best_solution_path, '/home/code/experiments/009_snapshot_ensemble/submission.csv')
shutil.copy(best_solution_path, '/home/submission/submission.csv')
print(f"Copied best overlap-free solution (70.659944) to submission folder")

# Verify
df_verify = pd.read_csv('/home/submission/submission.csv')
total = 0
for n in range(1, 201):
    trees, _ = load_trees_for_n(df_verify, n)
    side = get_bounding_box_side(trees)
    total += side**2 / n
print(f"Verified score: {total:.6f}")

Copied best overlap-free solution (70.659944) to submission folder


Verified score: 70.659944
