# Loop 1 LB Feedback Analysis

LB Score: 70.6761 (matches CV exactly)
Target: 68.922808
Gap: 1.753 points

This notebook analyzes all available solutions to find the best ensemble.

In [1]:
import numpy as np
import pandas as pd
import math
from numba import njit
import glob
import os

# Tree polygon template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x, y

@njit
def score_group(xs, ys, degs, tx, ty):
    """Calculate score for a group of trees"""
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    """Strip 's' prefix from values"""
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

tx, ty = make_polygon_template()
print(f"Tree template: {len(tx)} vertices")

Tree template: 15 vertices


In [2]:
# List all available solution files
solution_dirs = [
    '/home/code/santa-2025-csv/',
    '/home/code/bucket-of-chump/',
    '/home/code/telegram-solution/',
    '/home/code/telegram-full/',
    '/home/code/best-public/',
    '/home/code/best-public-packed/',
]

all_files = []
for d in solution_dirs:
    if os.path.exists(d):
        all_files.extend(glob.glob(d + '*.csv'))

print(f"Found {len(all_files)} solution files:")
for f in all_files:
    print(f"  {f}")

Found 8 solution files:
  /home/code/santa-2025-csv/santa-2025.csv
  /home/code/bucket-of-chump/submission.csv
  /home/code/telegram-solution/72.49.csv
  /home/code/telegram-solution/71.97.csv
  /home/code/telegram-full/72.49.csv
  /home/code/telegram-full/71.97.csv
  /home/code/best-public/submission_best.csv
  /home/code/best-public-packed/submission_best.csv


In [3]:
# Score each solution file and find best for each N
best = {n: {'score': 1e300, 'data': None, 'src': None} for n in range(1, 201)}
file_scores = {}

for fp in all_files:
    try:
        df = pd.read_csv(fp)
    except Exception as e:
        print(f"Error reading {fp}: {e}")
        continue
    
    if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
        print(f"Skipping {fp}: missing columns")
        continue
    
    df = df.copy()
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    
    total_score = 0
    for n, g in df.groupby('N'):
        if n < 1 or n > 200:
            continue
        xs = strip(g['x'].to_numpy())
        ys = strip(g['y'].to_numpy())
        ds = strip(g['deg'].to_numpy())
        sc = score_group(xs, ys, ds, tx, ty)
        total_score += sc
        
        if sc < best[n]['score']:
            best[n]['score'] = float(sc)
            best[n]['data'] = g.drop(columns=['N']).copy()
            best[n]['src'] = os.path.basename(fp)
    
    file_scores[os.path.basename(fp)] = total_score
    print(f"{os.path.basename(fp)}: total score = {total_score:.6f}")

print("\nFile scores sorted:")
for fn, sc in sorted(file_scores.items(), key=lambda x: x[1]):
    print(f"  {fn}: {sc:.6f}")

santa-2025.csv: total score = 70.676102


submission.csv: total score = 70.676501
72.49.csv: total score = 72.495739
71.97.csv: total score = 71.972027


72.49.csv: total score = 72.495739
71.97.csv: total score = 71.972027
submission_best.csv: total score = 70.926150
submission_best.csv: total score = 70.926150

File scores sorted:
  santa-2025.csv: 70.676102
  submission.csv: 70.676501
  submission_best.csv: 70.926150
  71.97.csv: 71.972027
  72.49.csv: 72.495739


In [4]:
# Calculate ensemble score
ensemble_score = sum(best[n]['score'] for n in range(1, 201))
print(f"\nEnsemble score (best from each N): {ensemble_score:.6f}")
print(f"Target score: 68.922808")
print(f"Gap to target: {ensemble_score - 68.922808:.6f}")

# Show which sources contribute to the ensemble
source_counts = {}
for n in range(1, 201):
    src = best[n]['src']
    source_counts[src] = source_counts.get(src, 0) + 1

print(f"\nSource contributions:")
for src, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f"  {src}: {count} N values")


Ensemble score (best from each N): 70.676102
Target score: 68.922808
Gap to target: 1.753294

Source contributions:
  santa-2025.csv: 200 N values


In [5]:
# Analyze which N values have the most room for improvement
print("\nTop 30 N values with highest contribution to score:")
n_scores = [(n, best[n]['score'], best[n]['src']) for n in range(1, 201)]
n_scores.sort(key=lambda x: -x[1])

for n, score, src in n_scores[:30]:
    print(f"  N={n:3d}: score={score:.6f} (from {src})")


Top 30 N values with highest contribution to score:
  N=  1: score=0.661250 (from santa-2025.csv)
  N=  2: score=0.450779 (from santa-2025.csv)
  N=  3: score=0.434745 (from santa-2025.csv)
  N=  5: score=0.416850 (from santa-2025.csv)
  N=  4: score=0.416545 (from santa-2025.csv)
  N=  7: score=0.399897 (from santa-2025.csv)
  N=  6: score=0.399610 (from santa-2025.csv)
  N=  9: score=0.387415 (from santa-2025.csv)
  N=  8: score=0.385407 (from santa-2025.csv)
  N= 15: score=0.379203 (from santa-2025.csv)
  N= 10: score=0.376630 (from santa-2025.csv)
  N= 21: score=0.376451 (from santa-2025.csv)
  N= 20: score=0.376057 (from santa-2025.csv)
  N= 11: score=0.375736 (from santa-2025.csv)
  N= 22: score=0.375258 (from santa-2025.csv)
  N= 16: score=0.374128 (from santa-2025.csv)
  N= 26: score=0.373997 (from santa-2025.csv)
  N= 12: score=0.372724 (from santa-2025.csv)
  N= 13: score=0.372323 (from santa-2025.csv)
  N= 25: score=0.372144 (from santa-2025.csv)
  N= 14: score=0.370569 (fr

In [6]:
# Create ensemble submission
rows = []
for n in range(1, 201):
    if best[n]['data'] is not None:
        rows.append(best[n]['data'])

ensemble_df = pd.concat(rows, ignore_index=True)
print(f"Ensemble submission rows: {len(ensemble_df)}")
print(f"Expected rows: {sum(range(1, 201))}")

# Save ensemble
ensemble_df.to_csv('/home/submission/submission.csv', index=False)
print("\nEnsemble saved to /home/submission/submission.csv")

Ensemble submission rows: 20100
Expected rows: 20100

Ensemble saved to /home/submission/submission.csv


In [7]:
# Verify the ensemble score
df = pd.read_csv('/home/submission/submission.csv')
df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)

total = 0
for n, g in df.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    sc = score_group(xs, ys, ds, tx, ty)
    total += sc

print(f"Verified ensemble score: {total:.6f}")

Verified ensemble score: 70.676102


In [None]:
# Summary
print("="*60)
print("SUMMARY")
print("="*60)
print(f"Best single file: {min(file_scores.items(), key=lambda x: x[1])}")
print(f"Ensemble score: {ensemble_score:.6f}")
print(f"Target: 68.922808")
print(f"Gap: {ensemble_score - 68.922808:.6f}")
print("="*60)