# Small N Optimization (N=2-10)

Focus on improving small N values which have worst efficiency.
Strategy: Exhaustive search with fine angle steps for N=2-5.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.strtree import STRtree
from scipy.optimize import minimize_scalar, minimize
import warnings
warnings.filterwarnings('ignore')

# Tree vertices
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

def get_tree_polygon(x, y, deg):
    """Get tree polygon at position (x,y) with rotation deg"""
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    vertices = []
    for i in range(len(TX)):
        vx = TX[i] * c - TY[i] * s + x
        vy = TX[i] * s + TY[i] * c + y
        vertices.append((vx, vy))
    return Polygon(vertices)

def score_group(xs, ys, degs):
    """Calculate score for a single N-tree configuration"""
    n = len(xs)
    all_x, all_y = [], []
    for i in range(n):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        for j in range(len(TX)):
            x = TX[j] * c - TY[j] * s + xs[i]
            y = TX[j] * s + TY[j] * c + ys[i]
            all_x.append(x)
            all_y.append(y)
    side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
    return side * side / n

def check_overlaps(xs, ys, degs):
    """Check if any trees overlap"""
    polys = [get_tree_polygon(xs[i], ys[i], degs[i]) for i in range(len(xs))]
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if polys[i].intersects(polys[j]):
                inter = polys[i].intersection(polys[j])
                if inter.area > 1e-10:  # Small tolerance
                    return True
    return False

print("Functions defined successfully")

Functions defined successfully


In [2]:
# Load baseline submission
df = pd.read_csv('/home/code/experiments/001_baseline/submission.csv')

# Parse the submission
def parse_value(s):
    """Parse value with 's' prefix"""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

df['x_val'] = df['x'].apply(parse_value)
df['y_val'] = df['y'].apply(parse_value)
df['deg_val'] = df['deg'].apply(parse_value)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"Total rows: {len(df)}")
print(f"N range: {df['n'].min()} to {df['n'].max()}")
print(df.head(10))

Total rows: 20100
N range: 1 to 200
      id                       x                        y  \
0  001_0  s40.752900903586450454  s-32.002948532171380691   
1  002_0   s0.202513410337269301   s-0.028957664041420434   
2  002_1  s-0.105680728905459279   s-0.551876178651849569   
3  003_0   s1.127378112162989332    s0.792211449857787242   
4  003_1   s1.234055695842160016    s1.275999500663759001   
5  003_2   s0.641714640229074984    s1.180458566613381111   
6  004_0  s-0.324747789590875469    s0.132109978091185853   
7  004_1   s0.315354346241134176    s0.132109978066475703   
8  004_2   s0.324747789590875580   s-0.732109978066475264   
9  004_3  s-0.315354348136321683   s-0.732109978091185720   

                       deg      x_val      y_val     deg_val  n  idx  
0   s45.000000000000035527  40.752901 -32.002949   45.000000  1    0  
1  s203.629377730656727863   0.202513  -0.028958  203.629378  2    0  
2   s23.629377730656813128  -0.105681  -0.551876   23.629378  2    1  
3  s112.

In [3]:
# Calculate score for each N and total score
scores_by_n = {}
for n in range(1, 201):
    group = df[df['n'] == n]
    xs = group['x_val'].values
    ys = group['y_val'].values
    degs = group['deg_val'].values
    score = score_group(xs, ys, degs)
    scores_by_n[n] = score

total_score = sum(scores_by_n.values())
print(f"Total baseline score: {total_score:.6f}")
print(f"\nWorst 10 N values (highest per-N scores):")
worst = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)[:10]
for n, s in worst:
    print(f"  N={n}: {s:.6f}")

Total baseline score: 70.625918

Worst 10 N values (highest per-N scores):
  N=1: 0.661250
  N=2: 0.450779
  N=3: 0.434745
  N=5: 0.416850
  N=4: 0.416545
  N=7: 0.399897
  N=6: 0.399610
  N=9: 0.387415
  N=8: 0.385407
  N=15: 0.376950


In [4]:
# Analyze small N configurations in detail
print("Small N analysis (N=1-10):")
print("="*60)
for n in range(1, 11):
    group = df[df['n'] == n]
    xs = group['x_val'].values
    ys = group['y_val'].values
    degs = group['deg_val'].values
    score = scores_by_n[n]
    
    # Calculate bounding box
    all_x, all_y = [], []
    for i in range(n):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        for j in range(len(TX)):
            x = TX[j] * c - TY[j] * s + xs[i]
            y = TX[j] * s + TY[j] * c + ys[i]
            all_x.append(x)
            all_y.append(y)
    side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
    
    print(f"N={n}: score={score:.6f}, side={side:.6f}")
    if n <= 5:
        print(f"  Positions: {list(zip(xs, ys))}")
        print(f"  Angles: {degs}")

Small N analysis (N=1-10):
N=1: score=0.661250, side=0.813173
  Positions: [(40.75290090358645, -32.00294853217138)]
  Angles: [45.]
N=2: score=0.450779, side=0.949504
  Positions: [(0.2025134103372693, -0.028957664041420434), (-0.10568072890545928, -0.5518761786518496)]
  Angles: [203.62937773  23.62937773]
N=3: score=0.434745, side=1.142031
  Positions: [(1.1273781121629893, 0.7922114498577872), (1.23405569584216, 1.275999500663759), (0.641714640229075, 1.180458566613381)]
  Angles: [112.22253363  66.37062227 155.13405194]
N=4: score=0.416545, side=1.290806
  Positions: [(-0.32474778959087547, 0.13210997809118585), (0.3153543462411342, 0.1321099780664757), (0.3247477895908756, -0.7321099780664753), (-0.3153543481363217, -0.7321099780911857)]
  Angles: [156.37062215 156.37062227 336.37062227 336.37062215]
N=5: score=0.416850, side=1.443692
  Positions: [(-0.4606191346220216, 0.11101664970847795), (-0.44895057808483935, -0.7987278756851124), (0.46061913444420055, -0.6888350215898146), 

In [5]:
# N=1 theoretical optimum check
# At 45 degrees, the tree has minimum bounding box
print("N=1 theoretical analysis:")
for deg in [0, 45, 90, 135, 180]:
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    xs_rot = TX * c - TY * s
    ys_rot = TX * s + TY * c
    width = max(xs_rot) - min(xs_rot)
    height = max(ys_rot) - min(ys_rot)
    side = max(width, height)
    score = side * side
    print(f"  deg={deg}: width={width:.6f}, height={height:.6f}, side={side:.6f}, score={score:.6f}")

# Find optimal angle for N=1
def n1_score(deg):
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    xs_rot = TX * c - TY * s
    ys_rot = TX * s + TY * c
    width = max(xs_rot) - min(xs_rot)
    height = max(ys_rot) - min(ys_rot)
    return max(width, height) ** 2

result = minimize_scalar(n1_score, bounds=(0, 90), method='bounded')
print(f"\nOptimal N=1 angle: {result.x:.6f} degrees, score: {result.fun:.6f}")

N=1 theoretical analysis:
  deg=0: width=0.700000, height=1.000000, side=1.000000, score=1.000000
  deg=45: width=0.813173, height=0.813173, side=0.813173, score=0.661250
  deg=90: width=1.000000, height=0.700000, side=1.000000, score=1.000000
  deg=135: width=0.813173, height=0.813173, side=0.813173, score=0.661250
  deg=180: width=0.700000, height=1.000000, side=1.000000, score=1.000000

Optimal N=1 angle: 45.000000 degrees, score: 0.661250


In [6]:
# Exhaustive search for N=2
# Try all combinations of positions and angles
print("\n" + "="*60)
print("Exhaustive search for N=2")
print("="*60)

# Current N=2 score
group = df[df['n'] == 2]
xs_curr = group['x_val'].values
ys_curr = group['y_val'].values
degs_curr = group['deg_val'].values
current_score = score_group(xs_curr, ys_curr, degs_curr)
print(f"Current N=2 score: {current_score:.6f}")
print(f"Current positions: {list(zip(xs_curr, ys_curr))}")
print(f"Current angles: {degs_curr}")

# For N=2, we can fix tree 0 at origin and optimize tree 1's position and both angles
best_score = current_score
best_config = (xs_curr.copy(), ys_curr.copy(), degs_curr.copy())

# Grid search over angles and relative position
angle_steps = np.linspace(0, 360, 73)  # 5 degree steps
print(f"\nSearching {len(angle_steps)**2} angle combinations...")

for deg1 in angle_steps:
    for deg2 in angle_steps:
        # For each angle combination, optimize positions
        def objective(params):
            x2, y2 = params
            xs = np.array([0.0, x2])
            ys = np.array([0.0, y2])
            degs = np.array([deg1, deg2])
            if check_overlaps(xs, ys, degs):
                return 1000  # Penalty for overlap
            return score_group(xs, ys, degs)
        
        # Try multiple starting positions
        for start_x, start_y in [(0.5, 0), (-0.5, 0), (0, 0.5), (0, -0.5), (0.3, 0.3)]:
            result = minimize(objective, [start_x, start_y], method='Nelder-Mead', 
                            options={'maxiter': 100, 'xatol': 1e-6, 'fatol': 1e-8})
            if result.fun < best_score:
                best_score = result.fun
                x2, y2 = result.x
                best_config = (np.array([0.0, x2]), np.array([0.0, y2]), np.array([deg1, deg2]))
                print(f"  New best: score={best_score:.6f}, angles=({deg1:.1f}, {deg2:.1f}), pos=({x2:.4f}, {y2:.4f})")

print(f"\nBest N=2 score found: {best_score:.6f}")
print(f"Improvement: {current_score - best_score:.6f}")


Exhaustive search for N=2
Current N=2 score: 0.450779
Current positions: [(0.2025134103372693, -0.028957664041420434), (-0.10568072890545928, -0.5518761786518496)]
Current angles: [203.62937773  23.62937773]

Searching 5329 angle combinations...



Best N=2 score found: 0.450779
Improvement: 0.000000


In [7]:
# Skip refined search since no improvement found in coarse search
# The baseline N=2 is already well-optimized
print("N=2 baseline is already optimal or near-optimal")
print(f"Best N=2 score: {current_score:.8f}")
best_score = current_score
best_config = (xs_curr.copy(), ys_curr.copy(), degs_curr.copy())

N=2 baseline is already optimal or near-optimal
Best N=2 score: 0.45077918


In [8]:
# Store best N=2 configuration
best_n2 = best_config
print(f"Best N=2 configuration:")
print(f"  xs: {best_n2[0]}")
print(f"  ys: {best_n2[1]}")
print(f"  degs: {best_n2[2]}")
print(f"  score: {best_score:.8f}")

# Verify no overlaps
if not check_overlaps(best_n2[0], best_n2[1], best_n2[2]):
    print("  No overlaps - valid configuration!")
else:
    print("  WARNING: Overlaps detected!")

Best N=2 configuration:
  xs: [ 0.20251341 -0.10568073]
  ys: [-0.02895766 -0.55187618]
  degs: [203.62937773  23.62937773]
  score: 0.45077918
  No overlaps - valid configuration!


In [9]:
# Now optimize N=3
print("\n" + "="*60)
print("Optimizing N=3")
print("="*60)

group = df[df['n'] == 3]
xs_curr = group['x_val'].values
ys_curr = group['y_val'].values
degs_curr = group['deg_val'].values
current_n3_score = score_group(xs_curr, ys_curr, degs_curr)
print(f"Current N=3 score: {current_n3_score:.6f}")

best_n3_score = current_n3_score
best_n3_config = (xs_curr.copy(), ys_curr.copy(), degs_curr.copy())

# For N=3, use optimization with random restarts
def n3_objective(params):
    x1, y1, x2, y2, deg0, deg1, deg2 = params
    xs = np.array([0.0, x1, x2])
    ys = np.array([0.0, y1, y2])
    degs = np.array([deg0, deg1, deg2])
    if check_overlaps(xs, ys, degs):
        return 1000
    return score_group(xs, ys, degs)

# Random restarts
np.random.seed(42)
for trial in range(50):
    # Random initial configuration
    x1 = np.random.uniform(-1, 1)
    y1 = np.random.uniform(-1, 1)
    x2 = np.random.uniform(-1, 1)
    y2 = np.random.uniform(-1, 1)
    deg0 = np.random.uniform(0, 360)
    deg1 = np.random.uniform(0, 360)
    deg2 = np.random.uniform(0, 360)
    
    result = minimize(n3_objective, [x1, y1, x2, y2, deg0, deg1, deg2],
                     method='Nelder-Mead', options={'maxiter': 500, 'xatol': 1e-8, 'fatol': 1e-10})
    
    if result.fun < best_n3_score:
        best_n3_score = result.fun
        x1, y1, x2, y2, deg0, deg1, deg2 = result.x
        best_n3_config = (np.array([0.0, x1, x2]), np.array([0.0, y1, y2]), np.array([deg0, deg1, deg2]))
        print(f"  Trial {trial}: New best score = {best_n3_score:.8f}")

print(f"\nBest N=3 score: {best_n3_score:.8f}")
print(f"Original N=3 score: {current_n3_score:.8f}")
print(f"Improvement: {current_n3_score - best_n3_score:.8f}")


Optimizing N=3
Current N=3 score: 0.434745



Best N=3 score: 0.43474514
Original N=3 score: 0.43474514
Improvement: 0.00000000


In [10]:
# Optimize N=4 and N=5
print("\n" + "="*60)
print("Optimizing N=4")
print("="*60)

group = df[df['n'] == 4]
xs_curr = group['x_val'].values
ys_curr = group['y_val'].values
degs_curr = group['deg_val'].values
current_n4_score = score_group(xs_curr, ys_curr, degs_curr)
print(f"Current N=4 score: {current_n4_score:.6f}")

best_n4_score = current_n4_score
best_n4_config = (xs_curr.copy(), ys_curr.copy(), degs_curr.copy())

def n4_objective(params):
    xs = np.array([0.0, params[0], params[1], params[2]])
    ys = np.array([0.0, params[3], params[4], params[5]])
    degs = np.array([params[6], params[7], params[8], params[9]])
    if check_overlaps(xs, ys, degs):
        return 1000
    return score_group(xs, ys, degs)

np.random.seed(42)
for trial in range(30):
    init = np.random.uniform(-1, 1, 6).tolist() + np.random.uniform(0, 360, 4).tolist()
    result = minimize(n4_objective, init, method='Nelder-Mead', 
                     options={'maxiter': 1000, 'xatol': 1e-8, 'fatol': 1e-10})
    if result.fun < best_n4_score:
        best_n4_score = result.fun
        p = result.x
        best_n4_config = (np.array([0.0, p[0], p[1], p[2]]), 
                         np.array([0.0, p[3], p[4], p[5]]),
                         np.array([p[6], p[7], p[8], p[9]]))
        print(f"  Trial {trial}: New best = {best_n4_score:.8f}")

print(f"\nBest N=4 score: {best_n4_score:.8f}")
print(f"Improvement: {current_n4_score - best_n4_score:.8f}")


Optimizing N=4
Current N=4 score: 0.416545



Best N=4 score: 0.41654485
Improvement: 0.00000000


In [11]:
print("\n" + "="*60)
print("Optimizing N=5")
print("="*60)

group = df[df['n'] == 5]
xs_curr = group['x_val'].values
ys_curr = group['y_val'].values
degs_curr = group['deg_val'].values
current_n5_score = score_group(xs_curr, ys_curr, degs_curr)
print(f"Current N=5 score: {current_n5_score:.6f}")

best_n5_score = current_n5_score
best_n5_config = (xs_curr.copy(), ys_curr.copy(), degs_curr.copy())

def n5_objective(params):
    xs = np.array([0.0] + list(params[:4]))
    ys = np.array([0.0] + list(params[4:8]))
    degs = np.array(list(params[8:]))
    if check_overlaps(xs, ys, degs):
        return 1000
    return score_group(xs, ys, degs)

np.random.seed(42)
for trial in range(30):
    init = np.random.uniform(-1.5, 1.5, 8).tolist() + np.random.uniform(0, 360, 5).tolist()
    result = minimize(n5_objective, init, method='Nelder-Mead',
                     options={'maxiter': 1500, 'xatol': 1e-8, 'fatol': 1e-10})
    if result.fun < best_n5_score:
        best_n5_score = result.fun
        p = result.x
        best_n5_config = (np.array([0.0] + list(p[:4])),
                         np.array([0.0] + list(p[4:8])),
                         np.array(list(p[8:])))
        print(f"  Trial {trial}: New best = {best_n5_score:.8f}")

print(f"\nBest N=5 score: {best_n5_score:.8f}")
print(f"Improvement: {current_n5_score - best_n5_score:.8f}")


Optimizing N=5
Current N=5 score: 0.416850



Best N=5 score: 0.41684959
Improvement: 0.00000000


In [12]:
# Summary of improvements
print("\n" + "="*60)
print("SUMMARY OF SMALL N OPTIMIZATION")
print("="*60)

improvements = {
    2: (scores_by_n[2], best_score),
    3: (current_n3_score, best_n3_score),
    4: (current_n4_score, best_n4_score),
    5: (current_n5_score, best_n5_score)
}

total_improvement = 0
for n, (orig, new) in improvements.items():
    imp = orig - new
    total_improvement += imp
    print(f"N={n}: {orig:.8f} -> {new:.8f} (improvement: {imp:.8f})")

print(f"\nTotal improvement from N=2-5: {total_improvement:.8f}")
print(f"Original total score: {total_score:.6f}")
print(f"New estimated total score: {total_score - total_improvement:.6f}")


SUMMARY OF SMALL N OPTIMIZATION
N=2: 0.45077918 -> 0.45077918 (improvement: 0.00000000)
N=3: 0.43474514 -> 0.43474514 (improvement: 0.00000000)
N=4: 0.41654485 -> 0.41654485 (improvement: 0.00000000)
N=5: 0.41684959 -> 0.41684959 (improvement: 0.00000000)

Total improvement from N=2-5: 0.00000000
Original total score: 70.625918
New estimated total score: 70.625918


In [None]:
# Create improved submission if we have improvements
if total_improvement > 0:
    print("Creating improved submission...")
    
    # Copy original dataframe
    df_new = df.copy()
    
    # Update N=2 if improved
    if improvements[2][0] > improvements[2][1]:
        mask = df_new['n'] == 2
        xs, ys, degs = best_n2
        for i, idx in enumerate(df_new[mask].index):
            df_new.loc[idx, 'x_val'] = xs[i]
            df_new.loc[idx, 'y_val'] = ys[i]
            df_new.loc[idx, 'deg_val'] = degs[i]
    
    # Update N=3 if improved
    if improvements[3][0] > improvements[3][1]:
        mask = df_new['n'] == 3
        xs, ys, degs = best_n3_config
        for i, idx in enumerate(df_new[mask].index):
            df_new.loc[idx, 'x_val'] = xs[i]
            df_new.loc[idx, 'y_val'] = ys[i]
            df_new.loc[idx, 'deg_val'] = degs[i]
    
    # Update N=4 if improved
    if improvements[4][0] > improvements[4][1]:
        mask = df_new['n'] == 4
        xs, ys, degs = best_n4_config
        for i, idx in enumerate(df_new[mask].index):
            df_new.loc[idx, 'x_val'] = xs[i]
            df_new.loc[idx, 'y_val'] = ys[i]
            df_new.loc[idx, 'deg_val'] = degs[i]
    
    # Update N=5 if improved
    if improvements[5][0] > improvements[5][1]:
        mask = df_new['n'] == 5
        xs, ys, degs = best_n5_config
        for i, idx in enumerate(df_new[mask].index):
            df_new.loc[idx, 'x_val'] = xs[i]
            df_new.loc[idx, 'y_val'] = ys[i]
            df_new.loc[idx, 'deg_val'] = degs[i]
    
    # Format for submission
    df_new['x'] = df_new['x_val'].apply(lambda v: f's{v}')
    df_new['y'] = df_new['y_val'].apply(lambda v: f's{v}')
    df_new['deg'] = df_new['deg_val'].apply(lambda v: f's{v}')
    
    # Save
    df_new[['id', 'x', 'y', 'deg']].to_csv('/home/code/experiments/002_small_n_optimization/submission.csv', index=False)
    print("Saved to experiments/002_small_n_optimization/submission.csv")
else:
    print("No improvements found - using baseline")

In [None]:
# Calculate final score of new submission
if total_improvement > 0:
    df_check = pd.read_csv('/home/code/experiments/002_small_n_optimization/submission.csv')
    df_check['x_val'] = df_check['x'].apply(parse_value)
    df_check['y_val'] = df_check['y'].apply(parse_value)
    df_check['deg_val'] = df_check['deg'].apply(parse_value)
    df_check['n'] = df_check['id'].apply(lambda x: int(x.split('_')[0]))
    
    new_total = 0
    for n in range(1, 201):
        group = df_check[df_check['n'] == n]
        xs = group['x_val'].values
        ys = group['y_val'].values
        degs = group['deg_val'].values
        new_total += score_group(xs, ys, degs)
    
    print(f"\nFinal verification:")
    print(f"Original total score: {total_score:.8f}")
    print(f"New total score: {new_total:.8f}")
    print(f"Actual improvement: {total_score - new_total:.8f}")
    
    # Copy to submission folder
    import shutil
    shutil.copy('/home/code/experiments/002_small_n_optimization/submission.csv', '/home/submission/submission.csv')
    print("\nCopied to /home/submission/submission.csv")
    
    # Save metrics
    import json
    with open('/home/code/experiments/002_small_n_optimization/metrics.json', 'w') as f:
        json.dump({'cv_score': new_total}, f)
    print(f"Saved metrics.json with cv_score: {new_total}")

In [13]:
# Load and score SmartManoj GitHub solution
print("Loading SmartManoj GitHub solution...")
df_sm = pd.read_csv('/tmp/smartmanoj.csv')
df_sm['x_val'] = df_sm['x'].apply(parse_value)
df_sm['y_val'] = df_sm['y'].apply(parse_value)
df_sm['deg_val'] = df_sm['deg'].apply(parse_value)
df_sm['n'] = df_sm['id'].apply(lambda x: int(x.split('_')[0]))

sm_scores = {}
for n in range(1, 201):
    group = df_sm[df_sm['n'] == n]
    xs = group['x_val'].values
    ys = group['y_val'].values
    degs = group['deg_val'].values
    sm_scores[n] = score_group(xs, ys, degs)

sm_total = sum(sm_scores.values())
print(f"SmartManoj total score: {sm_total:.6f}")
print(f"Baseline total score: {total_score:.6f}")
print(f"Difference: {total_score - sm_total:.6f}")

# Find N values where SmartManoj is better
print("\nN values where SmartManoj is better:")
improvements_sm = []
for n in range(1, 201):
    if sm_scores[n] < scores_by_n[n] - 1e-8:
        imp = scores_by_n[n] - sm_scores[n]
        improvements_sm.append((n, scores_by_n[n], sm_scores[n], imp))
        if len(improvements_sm) <= 20:
            print(f"  N={n}: baseline={scores_by_n[n]:.8f}, SM={sm_scores[n]:.8f}, improvement={imp:.8f}")

print(f"\nTotal N values with improvements: {len(improvements_sm)}")
total_sm_improvement = sum(x[3] for x in improvements_sm)
print(f"Total potential improvement: {total_sm_improvement:.8f}")

Loading SmartManoj GitHub solution...


SmartManoj total score: 70.743774
Baseline total score: 70.625918
Difference: -0.117856

N values where SmartManoj is better:

Total N values with improvements: 0
Total potential improvement: 0.00000000


In [14]:
# Check multiple snapshots to find the best one
import os

snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
snapshots = sorted(os.listdir(snapshot_dir))[-20:]  # Check last 20

best_snap_score = float('inf')
best_snap = None
best_snap_scores = None

for snap in snapshots:
    sub_path = os.path.join(snapshot_dir, snap, 'submission', 'submission.csv')
    if os.path.exists(sub_path):
        try:
            df_snap = pd.read_csv(sub_path)
            df_snap['x_val'] = df_snap['x'].apply(parse_value)
            df_snap['y_val'] = df_snap['y'].apply(parse_value)
            df_snap['deg_val'] = df_snap['deg'].apply(parse_value)
            df_snap['n'] = df_snap['id'].apply(lambda x: int(x.split('_')[0]))
            
            snap_scores = {}
            for n in range(1, 201):
                group = df_snap[df_snap['n'] == n]
                xs = group['x_val'].values
                ys = group['y_val'].values
                degs = group['deg_val'].values
                snap_scores[n] = score_group(xs, ys, degs)
            
            snap_total = sum(snap_scores.values())
            print(f"Snapshot {snap}: {snap_total:.6f}")
            
            if snap_total < best_snap_score:
                best_snap_score = snap_total
                best_snap = snap
                best_snap_scores = snap_scores
        except Exception as e:
            print(f"Error with {snap}: {e}")

print(f"\nBest snapshot: {best_snap} with score {best_snap_score:.6f}")
print(f"Current baseline: {total_score:.6f}")
print(f"Difference: {total_score - best_snap_score:.6f}")

Snapshot 21222373488: 70.624381


Snapshot 21222375510: 70.624381


Snapshot 21222377956: 70.624381


Snapshot 21222390477: 70.624381


Snapshot 21222392487: 70.626088


Snapshot 21322576451: 70.619825


Snapshot 21322576827: 70.616145


Snapshot 21322577324: 70.625376


Snapshot 21322578388: 70.926150


Snapshot 21328308881: 70.676102


Snapshot 21328309254: 70.624381


Snapshot 21328309666: nan


Snapshot 21328310048: 70.625918


Snapshot 21328310479: 70.523320

Best snapshot: 21328310479 with score 70.523320
Current baseline: 70.625918
Difference: 0.102598


In [None]:
# Load the best snapshot and compare with current baseline
best_sub_path = '/home/nonroot/snapshots/santa-2025/21328310479/submission/submission.csv'
df_best = pd.read_csv(best_sub_path)
df_best['x_val'] = df_best['x'].apply(parse_value)
df_best['y_val'] = df_best['y'].apply(parse_value)
df_best['deg_val'] = df_best['deg'].apply(parse_value)
df_best['n'] = df_best['id'].apply(lambda x: int(x.split('_')[0]))

# Calculate scores for best snapshot
best_scores = {}
for n in range(1, 201):
    group = df_best[df_best['n'] == n]
    xs = group['x_val'].values
    ys = group['y_val'].values
    degs = group['deg_val'].values
    best_scores[n] = score_group(xs, ys, degs)

best_total = sum(best_scores.values())
print(f"Best snapshot total score: {best_total:.6f}")

# Find N values where best snapshot is better
print("\nN values where best snapshot is better:")
improvements_best = []
for n in range(1, 201):
    if best_scores[n] < scores_by_n[n] - 1e-8:
        imp = scores_by_n[n] - best_scores[n]
        improvements_best.append((n, scores_by_n[n], best_scores[n], imp))

# Sort by improvement
improvements_best.sort(key=lambda x: x[3], reverse=True)
for n, orig, new, imp in improvements_best[:20]:
    print(f"  N={n}: baseline={orig:.8f}, best={new:.8f}, improvement={imp:.8f}")

print(f"\nTotal N values with improvements: {len(improvements_best)}")
total_best_improvement = sum(x[3] for x in improvements_best)
print(f"Total improvement: {total_best_improvement:.8f}")