# Evolver Loop 3 Analysis

## Current Status
- Best CV: 84.894026 (exp_002 - ensemble approach)
- Best LB: 117.281454 (exp_001 - not yet submitted exp_002)
- Target: 68.931058
- Gap: 15.96 points (23.2%)

## Key Questions
1. Which N values contribute most to the score?
2. What optimization techniques haven't we tried?
3. How can we close the remaining 15.96 point gap?

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the best submission
df = pd.read_csv('/home/submission/submission.csv')

# Fast scoring function
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

def strip_s(val):
    s = str(val)
    return float(s[1:] if s.startswith('s') else s)

def score_group_fast(xs, ys, degs):
    n = len(xs)
    if n == 0:
        return float('inf'), 0
    
    all_x = []
    all_y = []
    
    for i in range(n):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        px = TX * c - TY * s + xs[i]
        py = TX * s + TY * c + ys[i]
        all_x.extend(px)
        all_y.extend(py)
    
    all_x = np.array(all_x)
    all_y = np.array(all_y)
    
    side = max(all_x.max() - all_x.min(), all_y.max() - all_y.min())
    return side * side / n, side

print('Loaded submission with', len(df), 'rows')

In [None]:
# Analyze score contribution by N
df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
df['x_val'] = df['x'].apply(strip_s)
df['y_val'] = df['y'].apply(strip_s)
df['deg_val'] = df['deg'].apply(strip_s)

scores = []
for n in range(1, 201):
    group = df[df['N'] == n]
    xs = group['x_val'].values
    ys = group['y_val'].values
    degs = group['deg_val'].values
    score, side = score_group_fast(xs, ys, degs)
    scores.append({'N': n, 'score': score, 'side': side, 'contribution_pct': score / 84.894026 * 100})

scores_df = pd.DataFrame(scores)
total_score = scores_df['score'].sum()
print(f'Total score: {total_score:.6f}')
print(f'Target: 68.931058')
print(f'Gap: {total_score - 68.931058:.6f} ({(total_score - 68.931058) / 68.931058 * 100:.2f}%)')

In [None]:
# Top 20 worst-performing N values (highest score contribution)
worst_n = scores_df.nlargest(20, 'score')
print('Top 20 worst-performing N values:')
print(worst_n.to_string(index=False))
print(f'\nThese 20 N values contribute {worst_n["score"].sum():.4f} ({worst_n["score"].sum() / total_score * 100:.2f}%) of total score')

In [None]:
# Visualize score distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Score by N
ax1 = axes[0, 0]
ax1.bar(scores_df['N'], scores_df['score'], alpha=0.7)
ax1.set_xlabel('N')
ax1.set_ylabel('Score (sideÂ²/N)')
ax1.set_title('Score Contribution by N')
ax1.axhline(y=68.931058/200, color='r', linestyle='--', label=f'Target avg: {68.931058/200:.4f}')
ax1.legend()

# Side length by N
ax2 = axes[0, 1]
ax2.plot(scores_df['N'], scores_df['side'], 'b-', alpha=0.7)
ax2.set_xlabel('N')
ax2.set_ylabel('Side Length')
ax2.set_title('Bounding Box Side Length by N')

# Cumulative score
ax3 = axes[1, 0]
scores_df['cumsum'] = scores_df['score'].cumsum()
ax3.plot(scores_df['N'], scores_df['cumsum'], 'g-')
ax3.axhline(y=68.931058, color='r', linestyle='--', label='Target')
ax3.set_xlabel('N')
ax3.set_ylabel('Cumulative Score')
ax3.set_title('Cumulative Score by N')
ax3.legend()

# Score vs sqrt(N) - theoretical relationship
ax4 = axes[1, 1]
ax4.scatter(np.sqrt(scores_df['N']), scores_df['side'], alpha=0.5)
ax4.set_xlabel('sqrt(N)')
ax4.set_ylabel('Side Length')
ax4.set_title('Side Length vs sqrt(N)')

plt.tight_layout()
plt.savefig('/home/code/exploration/score_analysis.png', dpi=100)
plt.show()
print('Saved analysis plot')

In [None]:
# Compare with theoretical optimal
# For N trees, theoretical minimum side ~ sqrt(N) * tree_width
# Tree width is approximately 0.7 (base width)

TREE_WIDTH = 0.7
TREE_HEIGHT = 1.0

theoretical = []
for n in range(1, 201):
    # Theoretical minimum: pack trees in a square grid
    # Side ~ sqrt(N) * tree_size
    # But trees can overlap slightly with rotation
    theoretical_side = np.sqrt(n) * TREE_WIDTH * 0.8  # 0.8 factor for packing efficiency
    theoretical_score = theoretical_side ** 2 / n
    theoretical.append({'N': n, 'theoretical_side': theoretical_side, 'theoretical_score': theoretical_score})

theoretical_df = pd.DataFrame(theoretical)

# Compare actual vs theoretical
comparison = scores_df.merge(theoretical_df, on='N')
comparison['efficiency'] = comparison['theoretical_side'] / comparison['side']
comparison['gap'] = comparison['score'] - comparison['theoretical_score']

print('Efficiency analysis (theoretical/actual side):')
print(f'Mean efficiency: {comparison["efficiency"].mean():.4f}')
print(f'Min efficiency: {comparison["efficiency"].min():.4f} at N={comparison.loc[comparison["efficiency"].idxmin(), "N"]}')
print(f'Max efficiency: {comparison["efficiency"].max():.4f} at N={comparison.loc[comparison["efficiency"].idxmax(), "N"]}')

# N values with worst efficiency (most room for improvement)
worst_efficiency = comparison.nsmallest(20, 'efficiency')
print('\nN values with worst packing efficiency (most room for improvement):')
print(worst_efficiency[['N', 'side', 'theoretical_side', 'efficiency', 'score', 'gap']].to_string(index=False))

In [None]:
# Check what optimization techniques are available
import os
import glob

print('Available kernels:')
for kernel_dir in glob.glob('/home/code/research/kernels/*/'):
    print(f'  - {os.path.basename(kernel_dir.rstrip("/"))}')

print('\nSnapshot solutions:')
for snap_dir in glob.glob('/home/nonroot/snapshots/santa-2025/*/'):
    print(f'  - {os.path.basename(snap_dir.rstrip("/"))}')

print('\nOur experiments:')
for exp_dir in glob.glob('/home/code/experiments/*/'):
    print(f'  - {os.path.basename(exp_dir.rstrip("/"))}')

# Check if bbox3 is available
bbox3_paths = glob.glob('/home/nonroot/snapshots/**/bbox3', recursive=True)
print(f'\nbbox3 binary found: {len(bbox3_paths) > 0}')
if bbox3_paths:
    print(f'  Path: {bbox3_paths[0]}')

In [None]:
# Key insights summary
print('=' * 60)
print('KEY INSIGHTS FOR NEXT EXPERIMENT')
print('=' * 60)

print(f'\n1. CURRENT STATUS:')
print(f'   - Best score: {total_score:.6f}')
print(f'   - Target: 68.931058')
print(f'   - Gap: {total_score - 68.931058:.6f} ({(total_score - 68.931058) / 68.931058 * 100:.2f}%)')

print(f'\n2. WORST PERFORMING N VALUES:')
for _, row in worst_n.head(10).iterrows():
    print(f'   N={int(row["N"]):3d}: score={row["score"]:.6f}, side={row["side"]:.6f}')

print(f'\n3. OPTIMIZATION OPPORTUNITIES:')
print(f'   - Top 20 worst N values contribute {worst_n["score"].sum():.4f} ({worst_n["score"].sum() / total_score * 100:.2f}%)')
print(f'   - Average efficiency: {comparison["efficiency"].mean():.4f}')
print(f'   - If we improve worst 20 by 20%, we save: {worst_n["score"].sum() * 0.2:.4f} points')

print(f'\n4. TECHNIQUES NOT YET TRIED:')
print('   - bbox3 optimizer (3-hour budget approach)')
print('   - Translation-based initialization for grid-like N values')
print('   - Forward propagation (N=1 to N=200)')
print('   - Targeted optimization on worst N values')
print('   - Longer optimization runs (current: ~10 min, top kernels: 3 hours)')