# Loop 8 Analysis: Better Baseline from Snapshots

Key findings:
1. Found better overlap-free solution in snapshots: 70.659944 (vs our 70.675457)
2. Top kernels run for 3-11 HOURS, not 28 minutes
3. Need to use the better baseline and run longer optimization

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    poly = Polygon(TREE_VERTICES)
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    return max(max(xs) - min(xs), max(ys) - min(ys))

def has_overlap_strict(polygons, threshold=1e-15):
    if len(polygons) < 2:
        return False
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > threshold:
                    return True
    return False

def score_solution_detailed(path):
    df = pd.read_csv(path)
    df['x_val'] = df['x'].apply(parse_s_value)
    df['y_val'] = df['y'].apply(parse_s_value)
    df['deg_val'] = df['deg'].apply(parse_s_value)
    
    scores = {}
    overlaps = []
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        group = df[df['id'].str.startswith(prefix)]
        polygons = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
        side = get_bounding_box_side(polygons)
        scores[n] = side**2 / n
        if has_overlap_strict(polygons):
            overlaps.append(n)
    
    total = sum(scores.values())
    return total, scores, overlaps

print('Loading solutions...')

Loading solutions...


In [2]:
# Compare our current best with the snapshot best
our_best_path = '/home/code/experiments/006_corner_extraction/ensemble_best.csv'
snapshot_best_path = '/home/nonroot/snapshots/santa-2025/21145961371/submission/submission.csv'

our_score, our_scores, our_overlaps = score_solution_detailed(our_best_path)
snap_score, snap_scores, snap_overlaps = score_solution_detailed(snapshot_best_path)

print(f'Our current best: {our_score:.6f} (overlaps: {len(our_overlaps)})')
print(f'Snapshot best: {snap_score:.6f} (overlaps: {len(snap_overlaps)})')
print(f'Improvement available: {our_score - snap_score:.6f}')
print(f'Target: 68.919154')
print(f'Gap from snapshot to target: {snap_score - 68.919154:.6f}')

Our current best: 70.675457 (overlaps: 0)
Snapshot best: 70.659944 (overlaps: 0)
Improvement available: 0.015514
Target: 68.919154
Gap from snapshot to target: 1.740790


In [3]:
# Find which N values are better in snapshot
better_in_snap = []
for n in range(1, 201):
    diff = our_scores[n] - snap_scores[n]
    if diff > 1e-10:
        better_in_snap.append((n, diff, our_scores[n], snap_scores[n]))

print(f'\nN values where snapshot is better ({len(better_in_snap)} total):')
for n, diff, our, snap in sorted(better_in_snap, key=lambda x: -x[1])[:20]:
    print(f'  N={n:3d}: improvement={diff:.6f} (our={our:.6f}, snap={snap:.6f})')


N values where snapshot is better (148 total):
  N= 57: improvement=0.003937 (our=0.358045, snap=0.354108)
  N= 54: improvement=0.001486 (our=0.360686, snap=0.359200)
  N=101: improvement=0.001353 (our=0.350389, snap=0.349036)
  N=162: improvement=0.001274 (our=0.338332, snap=0.337058)
  N= 74: improvement=0.001012 (our=0.354139, snap=0.353127)
  N= 75: improvement=0.000875 (our=0.353773, snap=0.352898)
  N=123: improvement=0.000813 (our=0.348717, snap=0.347904)
  N=157: improvement=0.000505 (our=0.341876, snap=0.341371)
  N=187: improvement=0.000367 (our=0.340604, snap=0.340237)
  N=195: improvement=0.000291 (our=0.332901, snap=0.332610)
  N=141: improvement=0.000254 (our=0.343724, snap=0.343470)
  N= 76: improvement=0.000231 (our=0.351603, snap=0.351372)
  N=143: improvement=0.000214 (our=0.341362, snap=0.341148)
  N=142: improvement=0.000192 (our=0.341339, snap=0.341148)
  N=193: improvement=0.000187 (our=0.333950, snap=0.333763)
  N= 92: improvement=0.000172 (our=0.348482, snap=0.

In [4]:
# Create ensemble of our best + snapshot best
print('\nCreating ensemble of our best + snapshot best...')

our_df = pd.read_csv(our_best_path)
snap_df = pd.read_csv(snapshot_best_path)

ensemble_rows = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    if snap_scores[n] < our_scores[n]:
        # Use snapshot
        group = snap_df[snap_df['id'].str.startswith(prefix)]
    else:
        # Use ours
        group = our_df[our_df['id'].str.startswith(prefix)]
    ensemble_rows.append(group)

ensemble_df = pd.concat(ensemble_rows, ignore_index=True)
ensemble_path = '/home/code/experiments/009_snapshot_ensemble/ensemble.csv'
os.makedirs('/home/code/experiments/009_snapshot_ensemble', exist_ok=True)
ensemble_df.to_csv(ensemble_path, index=False)

# Score the ensemble
ensemble_score, ensemble_scores, ensemble_overlaps = score_solution_detailed(ensemble_path)
print(f'Ensemble score: {ensemble_score:.6f}')
print(f'Overlaps: {len(ensemble_overlaps)}')
print(f'Improvement over our best: {our_score - ensemble_score:.6f}')
print(f'Gap to target: {ensemble_score - 68.919154:.6f}')


Creating ensemble of our best + snapshot best...


Ensemble score: 70.659944
Overlaps: 0
Improvement over our best: 0.015514
Gap to target: 1.740790


In [5]:
# Copy the best solution to submission
import shutil
if len(ensemble_overlaps) == 0:
    shutil.copy(ensemble_path, '/home/submission/submission.csv')
    print('Copied ensemble to submission.csv')
else:
    print(f'WARNING: Ensemble has {len(ensemble_overlaps)} overlaps!')

Copied ensemble to submission.csv


In [None]:
# Now let's ensemble ALL snapshots to find the absolute best for each N
print('Ensembling ALL snapshots...')

base_dir = '/home/nonroot/snapshots/santa-2025'
all_solutions = []

for snapshot_id in sorted(os.listdir(base_dir)):
    if snapshot_id.startswith('.'):
        continue
    submission_path = os.path.join(base_dir, snapshot_id, 'submission', 'submission.csv')
    if os.path.exists(submission_path):
        try:
            score, scores, overlaps = score_solution_detailed(submission_path)
            all_solutions.append({
                'path': submission_path,
                'total_score': score,
                'scores': scores,
                'overlaps': overlaps,
                'snapshot_id': snapshot_id
            })
        except Exception as e:
            print(f'Error loading {snapshot_id}: {e}')

print(f'Loaded {len(all_solutions)} solutions')

# Find best for each N (only from overlap-free solutions)
best_for_n = {}
best_source_for_n = {}
for n in range(1, 201):
    best_score = float('inf')
    best_path = None
    for sol in all_solutions:
        if len(sol['overlaps']) == 0:  # Only use overlap-free solutions
            if sol['scores'][n] < best_score:
                best_score = sol['scores'][n]
                best_path = sol['path']
    best_for_n[n] = best_score
    best_source_for_n[n] = best_path

# Calculate total score if we take best from each N
total_best = sum(best_for_n.values())
print(f'\\nBest possible score by ensembling all overlap-free snapshots: {total_best:.6f}')
print(f'Target: 68.919154')
print(f'Gap: {total_best - 68.919154:.6f}')