# Loop 5 Strategic Analysis

## Current Status
- Best CV: 70.616068 (hybrid ensemble)
- Best LB: 70.622435 (sa_fast_v2)
- Target: 68.891380
- Gap: 1.725 points (2.50%)
- Submissions: 4/100 used, 96 remaining

## Key Insights from Research
1. SA is the dominant approach
2. N < 58: SA for unstructured packings
3. N >= 58: Tessellation/lattice is mathematically superior
4. Top teams use C++ optimizers
5. The gap of 1.725 points is HUGE - need fundamentally different approach

In [None]:
import pandas as pd
import numpy as np
import math
from numba import njit
import os

os.chdir('/home/code')

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = mny = 1e300
    mxx = mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c, s = math.cos(r), math.sin(r)
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xs[i]
            Y = s * tx[j] + c * ty[j] + ys[i]
            mnx, mxx = min(mnx, X), max(mxx, X)
            mny, mxy = min(mny, Y), max(mxy, Y)
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def get_scores_by_n(df):
    scores = {}
    for n in range(1, 201):
        mask = df['id'].str.startswith(f'{n:03d}_')
        group = df[mask]
        if len(group) != n:
            continue
        xs = group['x'].str[1:].astype(float).values
        ys = group['y'].str[1:].astype(float).values
        degs = group['deg'].str[1:].astype(float).values
        scores[n] = score_group(xs, ys, degs, TX, TY)
    return scores

print('Functions defined')

In [None]:
# Load our best solution and analyze per-N scores
df = pd.read_csv('/home/submission/submission.csv')
scores = get_scores_by_n(df)
total = sum(scores.values())
print(f'Total score: {total:.6f}')
print(f'Target: 68.891380')
print(f'Gap: {total - 68.891380:.6f}')

In [None]:
# Analyze score distribution by N ranges
ranges = [
    (1, 10, 'Small (1-10)'),
    (11, 30, 'Medium-Small (11-30)'),
    (31, 57, 'Medium (31-57)'),
    (58, 100, 'Large (58-100)'),
    (101, 200, 'Very Large (101-200)')
]

print('Score contribution by N range:')
print('=' * 60)
for start, end, name in ranges:
    range_score = sum(scores[n] for n in range(start, end+1) if n in scores)
    pct = range_score / total * 100
    print(f'{name:25s}: {range_score:8.4f} ({pct:5.1f}%)')
print('=' * 60)
print(f'Total: {total:.6f}')

In [None]:
# Find N values with highest individual scores (most room for improvement)
print('\nTop 20 N values by score contribution:')
sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:20]:
    pct = score / total * 100
    print(f'N={n:3d}: {score:.6f} ({pct:.2f}%)')

In [None]:
# Calculate theoretical minimum (perfect packing efficiency)
# Tree area = 0.2475 (calculated from polygon)
tree_area = 0.2475

print('\nTheoretical analysis:')
print('=' * 60)
print(f'Tree polygon area: {tree_area}')
print()
print('Theoretical minimum (100% packing efficiency):')
theoretical_min = 0
for n in range(1, 201):
    # Minimum square side = sqrt(n * tree_area)
    min_side = math.sqrt(n * tree_area)
    min_score = min_side ** 2 / n
    theoretical_min += min_score

print(f'Theoretical minimum score: {theoretical_min:.6f}')
print(f'Current score: {total:.6f}')
print(f'Gap to theoretical: {total - theoretical_min:.6f}')
print(f'Target: 68.891380')
print(f'Gap to target: {total - 68.891380:.6f}')

In [None]:
# Analyze packing efficiency by N
print('\nPacking efficiency by N (sample):')
print('=' * 60)
for n in [1, 5, 10, 20, 50, 100, 150, 200]:
    if n in scores:
        actual_area = scores[n] * n  # s^2
        theoretical_area = n * tree_area
        efficiency = theoretical_area / actual_area * 100
        print(f'N={n:3d}: efficiency={efficiency:5.1f}%, score={scores[n]:.6f}')

In [None]:
# Check what optimizers are available in snapshots
import subprocess

print('Available optimizers in snapshots:')
result = subprocess.run(['find', '/home/nonroot/snapshots', '-type', 'f', '-executable', '-name', '*'], 
                       capture_output=True, text=True)
for line in result.stdout.strip().split('\n')[:30]:
    if line:
        print(f'  {line}')

In [None]:
# Check for sa_v1_parallel which we haven't tried yet
result = subprocess.run(['find', '/home/nonroot/snapshots', '-name', 'sa_v1_parallel*', '-o', '-name', 'sa_parallel*'], 
                       capture_output=True, text=True)
print('sa_v1_parallel locations:')
for line in result.stdout.strip().split('\n'):
    if line:
        print(f'  {line}')

In [None]:
# Check for tree_packer variants
result = subprocess.run(['find', '/home/nonroot/snapshots', '-name', 'tree_packer*'], 
                       capture_output=True, text=True)
print('tree_packer locations:')
for line in result.stdout.strip().split('\n')[:20]:
    if line:
        print(f'  {line}')