# Loop 1 Analysis: Ensemble Strategy

## Goal
1. Score all available snapshots per-N
2. Find the best solution for each N value
3. Create an ensemble submission
4. Identify which N values have the most room for improvement

In [1]:
import numpy as np
import pandas as pd
import os
import glob
from tqdm import tqdm

# Tree vertices
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

def score_group(xs, ys, degs):
    """Calculate score for a single N-tree configuration"""
    n = len(xs)
    all_x, all_y = [], []
    for i in range(n):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        for j in range(len(TX)):
            x = TX[j] * c - TY[j] * s + xs[i]
            y = TX[j] * s + TY[j] * c + ys[i]
            all_x.append(x)
            all_y.append(y)
    side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
    return side * side / n

def parse_submission(df):
    """Parse submission and return per-N scores"""
    df = df.copy()
    # Parse the 's' prefix from values
    df['x_val'] = df['x'].astype(str).str.replace('s', '', regex=False).astype(float)
    df['y_val'] = df['y'].astype(str).str.replace('s', '', regex=False).astype(float)
    df['deg_val'] = df['deg'].astype(str).str.replace('s', '', regex=False).astype(float)
    
    # Extract N from id (e.g., '003_1' -> 3)
    df['n'] = df['id'].str.split('_').str[0].astype(int)
    
    per_n_scores = {}
    per_n_data = {}
    
    for n in range(1, 201):
        group = df[df['n'] == n]
        if len(group) == n:
            xs = group['x_val'].values
            ys = group['y_val'].values
            degs = group['deg_val'].values
            score = score_group(xs, ys, degs)
            per_n_scores[n] = score
            per_n_data[n] = group[['id', 'x', 'y', 'deg']].copy()
    
    return per_n_scores, per_n_data

print('Functions defined')

Functions defined


In [2]:
# Find all submission files in snapshots
snapshot_dir = '/home/nonroot/snapshots/santa-2025'
snapshot_dirs = sorted(glob.glob(f'{snapshot_dir}/*/'))
print(f'Found {len(snapshot_dirs)} snapshot directories')

# Collect all submission files
submission_files = []
for d in snapshot_dirs:
    sub_file = os.path.join(d, 'submission', 'submission.csv')
    if os.path.exists(sub_file):
        submission_files.append(sub_file)

print(f'Found {len(submission_files)} submission files')

Found 100 snapshot directories
Found 77 submission files


In [3]:
# Score all submissions and track best per-N
best_per_n = {n: {'score': float('inf'), 'data': None, 'source': None} for n in range(1, 201)}

for sub_file in tqdm(submission_files, desc='Scoring submissions'):
    try:
        df = pd.read_csv(sub_file)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        per_n_scores, per_n_data = parse_submission(df)
        
        for n, score in per_n_scores.items():
            if score < best_per_n[n]['score']:
                best_per_n[n]['score'] = score
                best_per_n[n]['data'] = per_n_data[n]
                best_per_n[n]['source'] = sub_file
    except Exception as e:
        print(f'Error processing {sub_file}: {e}')

print('\nDone scoring all submissions')

Scoring submissions:   0%|          | 0/77 [00:00<?, ?it/s]

Scoring submissions:   1%|▏         | 1/77 [00:00<00:29,  2.57it/s]

Scoring submissions:   3%|▎         | 2/77 [00:00<00:30,  2.48it/s]

Scoring submissions:   4%|▍         | 3/77 [00:01<00:29,  2.55it/s]

Scoring submissions:   5%|▌         | 4/77 [00:01<00:28,  2.57it/s]

Scoring submissions:   6%|▋         | 5/77 [00:01<00:27,  2.59it/s]

Scoring submissions:   8%|▊         | 6/77 [00:02<00:27,  2.54it/s]

Scoring submissions:   9%|▉         | 7/77 [00:02<00:27,  2.58it/s]

Scoring submissions:  10%|█         | 8/77 [00:03<00:26,  2.60it/s]

Scoring submissions:  12%|█▏        | 9/77 [00:03<00:26,  2.55it/s]

Scoring submissions:  13%|█▎        | 10/77 [00:03<00:26,  2.51it/s]

Scoring submissions:  14%|█▍        | 11/77 [00:04<00:26,  2.50it/s]

Scoring submissions:  16%|█▌        | 12/77 [00:04<00:26,  2.49it/s]

Scoring submissions:  17%|█▋        | 13/77 [00:05<00:26,  2.42it/s]

Scoring submissions:  18%|█▊        | 14/77 [00:05<00:25,  2.43it/s]

Scoring submissions:  19%|█▉        | 15/77 [00:05<00:25,  2.43it/s]

Scoring submissions:  21%|██        | 16/77 [00:06<00:25,  2.44it/s]

Scoring submissions:  22%|██▏       | 17/77 [00:06<00:25,  2.39it/s]

Scoring submissions:  23%|██▎       | 18/77 [00:07<00:24,  2.41it/s]

Scoring submissions:  25%|██▍       | 19/77 [00:07<00:23,  2.43it/s]

Scoring submissions:  26%|██▌       | 20/77 [00:08<00:23,  2.44it/s]

Scoring submissions:  27%|██▋       | 21/77 [00:08<00:23,  2.38it/s]

Scoring submissions:  29%|██▊       | 22/77 [00:08<00:22,  2.41it/s]

Scoring submissions:  30%|██▉       | 23/77 [00:09<00:22,  2.43it/s]

Scoring submissions:  31%|███       | 24/77 [00:09<00:21,  2.41it/s]

Scoring submissions:  32%|███▏      | 25/77 [00:10<00:21,  2.47it/s]

Scoring submissions:  34%|███▍      | 26/77 [00:10<00:20,  2.49it/s]

Scoring submissions:  35%|███▌      | 27/77 [00:10<00:19,  2.52it/s]

Scoring submissions:  36%|███▋      | 28/77 [00:11<00:19,  2.48it/s]

Scoring submissions:  38%|███▊      | 29/77 [00:11<00:19,  2.47it/s]

Scoring submissions:  39%|███▉      | 30/77 [00:12<00:18,  2.50it/s]

Scoring submissions:  40%|████      | 31/77 [00:12<00:18,  2.47it/s]

Scoring submissions:  42%|████▏     | 32/77 [00:12<00:18,  2.50it/s]

Scoring submissions:  43%|████▎     | 33/77 [00:13<00:17,  2.48it/s]

Scoring submissions:  45%|████▌     | 35/77 [00:13<00:13,  3.18it/s]

Scoring submissions:  47%|████▋     | 36/77 [00:14<00:14,  2.90it/s]

Scoring submissions:  48%|████▊     | 37/77 [00:14<00:14,  2.76it/s]

Scoring submissions:  49%|████▉     | 38/77 [00:15<00:14,  2.67it/s]

Scoring submissions:  51%|█████     | 39/77 [00:15<00:14,  2.60it/s]

Scoring submissions:  52%|█████▏    | 40/77 [00:15<00:14,  2.48it/s]

Scoring submissions:  53%|█████▎    | 41/77 [00:16<00:14,  2.48it/s]

Scoring submissions:  55%|█████▍    | 42/77 [00:16<00:14,  2.47it/s]

Scoring submissions:  56%|█████▌    | 43/77 [00:17<00:14,  2.41it/s]

Scoring submissions:  57%|█████▋    | 44/77 [00:17<00:13,  2.42it/s]

Scoring submissions:  58%|█████▊    | 45/77 [00:17<00:13,  2.43it/s]

Scoring submissions:  60%|█████▉    | 46/77 [00:18<00:12,  2.45it/s]

Scoring submissions:  61%|██████    | 47/77 [00:18<00:12,  2.39it/s]

Scoring submissions:  62%|██████▏   | 48/77 [00:19<00:12,  2.41it/s]

Scoring submissions:  64%|██████▎   | 49/77 [00:19<00:11,  2.42it/s]

Scoring submissions:  65%|██████▍   | 50/77 [00:20<00:11,  2.38it/s]

Scoring submissions:  66%|██████▌   | 51/77 [00:20<00:10,  2.40it/s]

Scoring submissions:  68%|██████▊   | 52/77 [00:20<00:10,  2.42it/s]

Scoring submissions:  69%|██████▉   | 53/77 [00:21<00:09,  2.44it/s]

Scoring submissions:  70%|███████   | 54/77 [00:21<00:09,  2.38it/s]

Scoring submissions:  71%|███████▏  | 55/77 [00:22<00:09,  2.40it/s]

Scoring submissions:  73%|███████▎  | 56/77 [00:22<00:08,  2.42it/s]

Scoring submissions:  74%|███████▍  | 57/77 [00:22<00:08,  2.44it/s]

Scoring submissions:  75%|███████▌  | 58/77 [00:23<00:07,  2.39it/s]

Scoring submissions:  77%|███████▋  | 59/77 [00:23<00:07,  2.41it/s]

Scoring submissions:  78%|███████▊  | 60/77 [00:24<00:07,  2.42it/s]

Scoring submissions:  79%|███████▉  | 61/77 [00:24<00:06,  2.38it/s]

Scoring submissions:  81%|████████  | 62/77 [00:24<00:06,  2.40it/s]

Scoring submissions:  82%|████████▏ | 63/77 [00:25<00:05,  2.42it/s]

Scoring submissions:  83%|████████▎ | 64/77 [00:25<00:05,  2.44it/s]

Scoring submissions:  84%|████████▍ | 65/77 [00:26<00:05,  2.39it/s]

Scoring submissions:  86%|████████▌ | 66/77 [00:26<00:04,  2.41it/s]

Scoring submissions:  87%|████████▋ | 67/77 [00:27<00:04,  2.43it/s]

Scoring submissions:  88%|████████▊ | 68/77 [00:27<00:03,  2.38it/s]

Scoring submissions:  90%|████████▉ | 69/77 [00:27<00:03,  2.40it/s]

Scoring submissions:  91%|█████████ | 70/77 [00:28<00:02,  2.42it/s]

Scoring submissions:  92%|█████████▏| 71/77 [00:28<00:02,  2.45it/s]

Scoring submissions:  94%|█████████▎| 72/77 [00:29<00:02,  2.40it/s]

Scoring submissions:  95%|█████████▍| 73/77 [00:29<00:01,  2.41it/s]

Scoring submissions:  96%|█████████▌| 74/77 [00:29<00:01,  2.43it/s]

Scoring submissions:  97%|█████████▋| 75/77 [00:30<00:00,  2.43it/s]

Scoring submissions:  99%|█████████▊| 76/77 [00:30<00:00,  2.37it/s]

Scoring submissions: 100%|██████████| 77/77 [00:31<00:00,  2.39it/s]

Scoring submissions: 100%|██████████| 77/77 [00:31<00:00,  2.47it/s]


Done scoring all submissions





In [4]:
# Calculate total ensemble score
total_ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201) if best_per_n[n]['data'] is not None)
print(f'Total ensemble score from snapshots: {total_ensemble_score:.6f}')

# Compare to baseline
baseline_score = 70.625918
print(f'Baseline score: {baseline_score:.6f}')
print(f'Improvement: {baseline_score - total_ensemble_score:.6f}')

# Show score breakdown by range
print('\nScore breakdown by N range:')
for start, end in [(1, 50), (51, 100), (101, 150), (151, 200)]:
    range_score = sum(best_per_n[n]['score'] for n in range(start, end+1))
    print(f'  N={start}-{end}: {range_score:.4f}')

Total ensemble score from snapshots: 70.523320
Baseline score: 70.625918
Improvement: 0.102598

Score breakdown by N range:
  N=1-50: 18.9890
  N=51-100: 17.5713
  N=101-150: 17.1261
  N=151-200: 16.8369


In [5]:
# Show which N values improved vs baseline
baseline_df = pd.read_csv('/home/code/experiments/001_baseline/baseline.csv')
baseline_per_n, _ = parse_submission(baseline_df)

improvements = []
for n in range(1, 201):
    baseline_n = baseline_per_n.get(n, float('inf'))
    ensemble_n = best_per_n[n]['score']
    if ensemble_n < baseline_n - 1e-9:
        improvements.append((n, baseline_n, ensemble_n, baseline_n - ensemble_n))

print(f'\nN values with improvements: {len(improvements)}')
if improvements:
    improvements.sort(key=lambda x: x[3], reverse=True)
    print('\nTop 20 improvements:')
    for n, old, new, diff in improvements[:20]:
        print(f'  N={n}: {old:.6f} -> {new:.6f} (improvement: {diff:.6f})')


N values with improvements: 94

Top 20 improvements:
  N=5: 0.416850 -> 0.394109 (improvement: 0.022740)
  N=2: 0.450779 -> 0.437328 (improvement: 0.013452)
  N=56: 0.352281 -> 0.340953 (improvement: 0.011327)
  N=55: 0.355023 -> 0.346789 (improvement: 0.008234)
  N=4: 0.416545 -> 0.411056 (improvement: 0.005489)
  N=54: 0.356435 -> 0.352169 (improvement: 0.004266)
  N=71: 0.352232 -> 0.348328 (improvement: 0.003904)
  N=87: 0.353691 -> 0.349960 (improvement: 0.003732)
  N=53: 0.361855 -> 0.358787 (improvement: 0.003069)
  N=65: 0.363285 -> 0.361611 (improvement: 0.001674)
  N=80: 0.344880 -> 0.343654 (improvement: 0.001227)
  N=47: 0.357493 -> 0.356418 (improvement: 0.001075)
  N=140: 0.340098 -> 0.339163 (improvement: 0.000935)
  N=108: 0.343558 -> 0.342627 (improvement: 0.000931)
  N=168: 0.332475 -> 0.331548 (improvement: 0.000927)
  N=77: 0.351113 -> 0.350211 (improvement: 0.000903)
  N=136: 0.345024 -> 0.344159 (improvement: 0.000865)
  N=69: 0.354528 -> 0.353706 (improvement: 0

In [6]:
# Create ensemble submission
ensemble_rows = []
for n in range(1, 201):
    if best_per_n[n]['data'] is not None:
        ensemble_rows.append(best_per_n[n]['data'])

ensemble_df = pd.concat(ensemble_rows, ignore_index=True)

# Sort by N and index
ensemble_df['sn'] = ensemble_df['id'].str.split('_').str[0].astype(int)
ensemble_df['si'] = ensemble_df['id'].str.split('_').str[1].astype(int)
ensemble_df = ensemble_df.sort_values(['sn', 'si']).drop(columns=['sn', 'si'])
ensemble_df = ensemble_df[['id', 'x', 'y', 'deg']]

print(f'Ensemble submission shape: {ensemble_df.shape}')
print(ensemble_df.head())

Ensemble submission shape: (20100, 4)
      id                       x                        y  \
0  001_0  s43.591192092102147626  s-31.783267068741778871   
1  002_0   s0.154097069621355887   s-0.038540742694794648   
2  002_1  s-0.154097069621372845   s-0.561459257305224058   
3  003_0      s0.254937643697833      s-0.233436061549416   
4  003_1      s0.357722754471247       s0.250360566787394   

                       deg  
0   s44.999999999999978684  
1  s144.272760863123583164  
2  s324.272760863123533426  
3      s113.56326044172948  
4         s66.370622269343  


In [7]:
# Save ensemble submission
os.makedirs('/home/code/experiments/002_ensemble_snapshots', exist_ok=True)
ensemble_df.to_csv('/home/code/experiments/002_ensemble_snapshots/submission.csv', index=False)
ensemble_df.to_csv('/home/submission/submission.csv', index=False)

print(f'Saved ensemble submission with score: {total_ensemble_score:.6f}')

Saved ensemble submission with score: 70.523320


In [8]:
# Also check research kernels for additional solutions
kernel_dir = '/home/code/research/kernels'
kernel_submissions = []

for root, dirs, files in os.walk(kernel_dir):
    for f in files:
        if f.endswith('.csv'):
            kernel_submissions.append(os.path.join(root, f))

print(f'Found {len(kernel_submissions)} CSV files in research kernels')
for f in kernel_submissions:
    print(f'  {f}')

Found 0 CSV files in research kernels


In [9]:
# Score kernel submissions and update best_per_n
for sub_file in kernel_submissions:
    try:
        df = pd.read_csv(sub_file)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        per_n_scores, per_n_data = parse_submission(df)
        
        for n, score in per_n_scores.items():
            if score < best_per_n[n]['score']:
                best_per_n[n]['score'] = score
                best_per_n[n]['data'] = per_n_data[n]
                best_per_n[n]['source'] = sub_file
                print(f'Found better N={n}: {score:.6f} from {sub_file}')
    except Exception as e:
        pass

# Recalculate total
total_with_kernels = sum(best_per_n[n]['score'] for n in range(1, 201) if best_per_n[n]['data'] is not None)
print(f'\nTotal score with kernels: {total_with_kernels:.6f}')


Total score with kernels: 70.523320


In [None]:
# Check what external sources the jonathanchan kernel uses
# Let's try to download the SmartManoj GitHub submission
import urllib.request

try:
    url = 'https://raw.githubusercontent.com/SmartManoj/Santa-Scoreboard/main/submission.csv'
    urllib.request.urlretrieve(url, '/home/code/experiments/002_ensemble_snapshots/smartmanoj.csv')
    print('Downloaded SmartManoj submission')
    
    df = pd.read_csv('/home/code/experiments/002_ensemble_snapshots/smartmanoj.csv')
    print(f'Shape: {df.shape}')
    print(df.head())
    
    # Score it
    per_n_scores, per_n_data = parse_submission(df)
    total = sum(per_n_scores.values())
    print(f'\nSmartManoj total score: {total:.6f}')
    
    # Check for improvements
    improvements_from_smartmanoj = 0
    for n, score in per_n_scores.items():
        if score < best_per_n[n]['score'] - 1e-9:
            improvements_from_smartmanoj += 1
            best_per_n[n]['score'] = score
            best_per_n[n]['data'] = per_n_data[n]
            best_per_n[n]['source'] = 'smartmanoj'
    
    print(f'N values improved from SmartManoj: {improvements_from_smartmanoj}')
    
    new_total = sum(best_per_n[n]['score'] for n in range(1, 201))
    print(f'New ensemble total: {new_total:.6f}')
except Exception as e:
    print(f'Error: {e}')