# Loop 3 LB Feedback Analysis

## Key Findings:
1. CV-LB gap is exactly 0.0000 - our local scoring is perfect
2. Tessellation approach FAILED for all N values tested
3. Better snapshot exists at 70.647306 (vs current 70.658891)
4. Gap to target: 1.74 points (2.5%)

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def get_score_per_n(df):
    """Get score contribution for each N"""
    scores = {}
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        n_trees = df[df['id'].str.startswith(prefix)]
        if len(n_trees) != n:
            continue
        
        all_coords = []
        for _, row in n_trees.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            base = Polygon(zip(TX, TY))
            rotated = affinity.rotate(base, deg, origin=(0, 0))
            poly = affinity.translate(rotated, x, y)
            coords = np.array(poly.exterior.coords)
            all_coords.append(coords)
        
        all_coords = np.vstack(all_coords)
        x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
        y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
        side = max(x_range, y_range)
        scores[n] = {'side': side, 'score': side**2 / n}
    return scores

print('Functions defined')

Functions defined


In [2]:
# Load current best and the better snapshot
df_current = pd.read_csv('/home/nonroot/snapshots/santa-2025/21164519357/code/exploration/santa-2025.csv')
df_better = pd.read_csv('/home/nonroot/snapshots/santa-2025/21165874980/code/submission_candidates/candidate_001.csv')

print('Current best:', df_current.shape)
print('Better snapshot:', df_better.shape)

# Calculate scores
scores_current = get_score_per_n(df_current)
scores_better = get_score_per_n(df_better)

total_current = sum(s['score'] for s in scores_current.values())
total_better = sum(s['score'] for s in scores_better.values())

print(f'\nCurrent best total: {total_current:.6f}')
print(f'Better snapshot total: {total_better:.6f}')
print(f'Improvement: {total_current - total_better:.6f}')

Current best: (20100, 4)
Better snapshot: (20100, 4)



Current best total: 70.658891
Better snapshot total: 70.647306
Improvement: 0.011585


In [3]:
# Compare per-N scores to find where the better snapshot is better
print('N values where better snapshot is better:')
print('='*60)

improvements = []
for n in range(1, 201):
    if n in scores_current and n in scores_better:
        diff = scores_current[n]['score'] - scores_better[n]['score']
        if abs(diff) > 1e-8:
            improvements.append((n, diff, scores_current[n]['score'], scores_better[n]['score']))

improvements.sort(key=lambda x: -x[1])  # Sort by improvement (descending)

print(f'\nTop improvements (better snapshot is better):')
for n, diff, curr, better in improvements[:20]:
    if diff > 0:
        print(f'  N={n:3d}: current={curr:.6f}, better={better:.6f}, improvement={diff:+.6f}')

print(f'\nTop regressions (current is better):')
for n, diff, curr, better in improvements[-10:]:
    if diff < 0:
        print(f'  N={n:3d}: current={curr:.6f}, better={better:.6f}, regression={diff:+.6f}')

N values where better snapshot is better:

Top improvements (better snapshot is better):
  N=128: current=0.343716, better=0.340974, improvement=+0.002742
  N= 94: current=0.352271, better=0.350076, improvement=+0.002195
  N=157: current=0.341360, better=0.340204, improvement=+0.001156
  N= 14: current=0.370454, better=0.369543, improvement=+0.000911
  N= 11: current=0.375736, better=0.374924, improvement=+0.000812
  N=193: current=0.333760, better=0.333341, improvement=+0.000418
  N=137: current=0.343177, better=0.342810, improvement=+0.000366
  N=123: current=0.347904, better=0.347565, improvement=+0.000340
  N= 59: current=0.360855, better=0.360583, improvement=+0.000272
  N=115: current=0.343570, better=0.343333, improvement=+0.000237
  N=101: current=0.349035, better=0.348801, improvement=+0.000234
  N=192: current=0.335301, better=0.335077, improvement=+0.000223
  N= 89: current=0.346899, better=0.346717, improvement=+0.000182
  N= 32: current=0.365592, better=0.365417, improveme

In [4]:
# Create ensemble: take best of each N
print('Creating ensemble from best of each N...')

ensemble_total = 0
for n in range(1, 201):
    if n in scores_current and n in scores_better:
        ensemble_total += min(scores_current[n]['score'], scores_better[n]['score'])
    elif n in scores_current:
        ensemble_total += scores_current[n]['score']
    elif n in scores_better:
        ensemble_total += scores_better[n]['score']

print(f'\nEnsemble total: {ensemble_total:.6f}')
print(f'vs Current: {total_current:.6f} (improvement: {total_current - ensemble_total:.6f})')
print(f'vs Better: {total_better:.6f} (improvement: {total_better - ensemble_total:.6f})')

Creating ensemble from best of each N...

Ensemble total: 70.647306
vs Current: 70.658891 (improvement: 0.011585)
vs Better: 70.647306 (improvement: 0.000000)


In [5]:
# Check all snapshots for even better solutions
import glob

print('Searching all snapshots for better solutions...')

all_csvs = glob.glob('/home/nonroot/snapshots/santa-2025/*/code/**/*.csv', recursive=True)
print(f'Found {len(all_csvs)} CSV files')

# Sample a few to check
best_score = total_better
best_path = '/home/nonroot/snapshots/santa-2025/21165874980/code/submission_candidates/candidate_001.csv'

for csv_path in all_csvs[:50]:  # Check first 50
    try:
        df = pd.read_csv(csv_path)
        if 'id' not in df.columns or 'x' not in df.columns:
            continue
        if len(df) != 20100:
            continue
        
        scores = get_score_per_n(df)
        total = sum(s['score'] for s in scores.values())
        
        if total < best_score:
            print(f'BETTER: {csv_path}: {total:.6f}')
            best_score = total
            best_path = csv_path
    except:
        pass

print(f'\nBest found: {best_score:.6f} at {best_path}')

Searching all snapshots for better solutions...
Found 908 CSV files



Best found: 70.647306 at /home/nonroot/snapshots/santa-2025/21165874980/code/submission_candidates/candidate_001.csv


In [6]:
# Gap analysis: where is the most room for improvement?
print('Gap analysis: Score contribution by N range')
print('='*60)

# Group by N ranges
ranges = [(1, 10), (11, 20), (21, 50), (51, 100), (101, 150), (151, 200)]

for start, end in ranges:
    range_score = sum(scores_better[n]['score'] for n in range(start, end+1) if n in scores_better)
    print(f'N={start:3d}-{end:3d}: {range_score:.4f}')

print(f'\nTotal: {total_better:.6f}')
print(f'Target: 68.919154')
print(f'Gap: {total_better - 68.919154:.6f} ({(total_better - 68.919154) / 68.919154 * 100:.2f}%)')

Gap analysis: Score contribution by N range
N=  1- 10: 4.3291
N= 11- 20: 3.7263
N= 21- 50: 10.9844
N= 51-100: 17.6279
N=101-150: 17.1366
N=151-200: 16.8430

Total: 70.647306
Target: 68.919154
Gap: 1.728152 (2.51%)


In [7]:
# Search more snapshots for even better solutions
print('Searching ALL snapshots for better solutions...')

best_per_n = {}
for n in range(1, 201):
    if n in scores_better:
        best_per_n[n] = {'score': scores_better[n]['score'], 'source': 'better_snapshot'}

checked = 0
for csv_path in all_csvs:
    try:
        df = pd.read_csv(csv_path)
        if 'id' not in df.columns or 'x' not in df.columns:
            continue
        if len(df) != 20100:
            continue
        
        scores = get_score_per_n(df)
        for n, s in scores.items():
            if n not in best_per_n or s['score'] < best_per_n[n]['score']:
                best_per_n[n] = {'score': s['score'], 'source': csv_path}
        
        checked += 1
        if checked % 100 == 0:
            print(f'Checked {checked} files...')
    except:
        pass

print(f'\nChecked {checked} valid files')

# Calculate best possible ensemble
ensemble_best = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f'\nBest possible ensemble: {ensemble_best:.6f}')
print(f'vs Better snapshot: {total_better:.6f} (improvement: {total_better - ensemble_best:.6f})')

# Show N values with different sources
different_sources = [(n, best_per_n[n]) for n in range(1, 201) if best_per_n[n]['source'] != 'better_snapshot']
print(f'\nN values with better solutions from other sources: {len(different_sources)}')

Searching ALL snapshots for better solutions...


Checked 100 files...


Checked 200 files...


Checked 300 files...


Checked 400 files...


Checked 500 files...


Checked 600 files...


Checked 700 files...


Checked 800 files...



Checked 893 valid files

Best possible ensemble: 27.492466
vs Better snapshot: 70.647306 (improvement: 43.154840)

N values with better solutions from other sources: 199


In [None]:
# This is suspicious - 27.49 is way too good. Let me check for overlaps\nprint('Checking if best ensemble has overlaps...')\n\n# Get the sources for the best ensemble\nfrom collections import Counter\nsource_counts = Counter(best_per_n[n]['source'] for n in range(1, 201))\nprint('\\nTop sources:')\nfor source, count in source_counts.most_common(10):\n    print(f'  {count} N values: {source[:80]}...')\n\n# Check a few of the best N values for overlaps\nprint('\\nChecking for overlaps in best solutions...')\n\ndef check_overlaps(df, n):\n    \"\"\"Check if N trees have overlaps\"\"\"\n    prefix = f'{n:03d}_'\n    n_trees = df[df['id'].str.startswith(prefix)]\n    if len(n_trees) != n:\n        return None\n    \n    polygons = []\n    for _, row in n_trees.iterrows():\n        x = parse_value(row['x'])\n        y = parse_value(row['y'])\n        deg = parse_value(row['deg'])\n        base = Polygon(zip(TX, TY))\n        rotated = affinity.rotate(base, deg, origin=(0, 0))\n        poly = affinity.translate(rotated, x, y)\n        polygons.append(poly)\n    \n    # Check for overlaps\n    for i in range(len(polygons)):\n        for j in range(i+1, len(polygons)):\n            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):\n                inter = polygons[i].intersection(polygons[j])\n                if inter.area > 1e-10:\n                    return True\n    return False\n\n# Check a sample of N values with different sources\nfor n in [1, 10, 50, 100, 150, 200]:\n    if best_per_n[n]['source'] != 'better_snapshot':\n        df = pd.read_csv(best_per_n[n]['source'])\n        has_overlap = check_overlaps(df, n)\n        print(f'  N={n}: score={best_per_n[n][\"score\"]:.6f}, overlaps={has_overlap}')\n    else:\n        print(f'  N={n}: score={best_per_n[n][\"score\"]:.6f}, source=better_snapshot')"}
</invoke>