# Loop 2 Analysis: Understanding the Gap and Next Steps

## Current Status
- Best CV: 87.90 (exp_001 - SA with 8 move types)
- Target: 68.95
- Gap: 18.95 points (21.5% improvement needed)

## Key Questions
1. Where is the score coming from? (breakdown by N)
2. What's the theoretical minimum for each N range?
3. How much can we gain from ensemble vs more iterations?

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
import math

getcontext().prec = 25
scale_factor = Decimal('1e15')

print("Imports complete")

In [None]:
# Load current best submission
df = pd.read_csv('/home/code/submission_candidates/candidate_001.csv')
print(f"Shape: {df.shape}")
print(df.head())

# Parse N from id
df['N'] = df['id'].str.split('_').str[0].astype(int)
print(f"\nN range: {df['N'].min()} to {df['N'].max()}")

In [None]:
# Define tree template for scoring
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x, y

def strip(a):
    return np.array([float(str(v).replace('s','')) for v in a], np.float64)

def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c*tx[j] - s*ty[j] + xi
            Y = s*tx[j] + c*ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

tx, ty = make_polygon_template()
print("Scoring functions defined")

In [None]:
# Calculate score breakdown by N
scores_by_n = {}
for n, g in df.groupby('N'):
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    sc = score_group(xs, ys, ds, tx, ty)
    scores_by_n[n] = sc

total_score = sum(scores_by_n.values())
print(f"Total score: {total_score:.6f}")

# Breakdown by ranges
ranges = [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]
for start, end in ranges:
    range_score = sum(scores_by_n[n] for n in range(start, end+1))
    print(f"N={start}-{end}: {range_score:.6f}")

In [None]:
# Analyze individual N scores
print("\nTop 20 highest-scoring N values (most room for improvement):")
sorted_scores = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:20]:
    print(f"  N={n}: {score:.6f}")

In [None]:
# Theoretical minimum for N=1 (single tree at optimal rotation)
# Tree dimensions: width 0.7, height 1.0
# At 45 degrees, the bounding box is sqrt(2) * max(0.7, 1.0) / sqrt(2) = ?

# Actually, let's compute it properly
from shapely.geometry import Polygon
from shapely import affinity

def get_tree_bbox_at_angle(angle):
    """Get bounding box dimensions for a single tree at given angle."""
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    
    coords = [
        (0, tip),
        (ow/2, t1), (ow/4, t1),
        (mw/2, t2), (mw/4, t2),
        (bw/2, base),
        (tw/2, base), (tw/2, tbot),
        (-tw/2, tbot), (-tw/2, base),
        (-bw/2, base),
        (-mw/4, t2), (-mw/2, t2),
        (-ow/4, t1), (-ow/2, t1),
    ]
    poly = Polygon(coords)
    rotated = affinity.rotate(poly, angle, origin=(0, 0))
    bounds = rotated.bounds  # (minx, miny, maxx, maxy)
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    side = max(width, height)
    return side, side**2

# Find optimal angle for N=1
print("N=1 score at different angles:")
for angle in [0, 15, 30, 45, 60, 75, 90]:
    side, score = get_tree_bbox_at_angle(angle)
    print(f"  {angle}째: side={side:.6f}, score={score:.6f}")

# Fine search around 45
print("\nFine search around 45째:")
best_angle = 0
best_score = float('inf')
for angle in range(40, 51):
    side, score = get_tree_bbox_at_angle(angle)
    if score < best_score:
        best_score = score
        best_angle = angle
    print(f"  {angle}째: score={score:.6f}")
print(f"\nOptimal: {best_angle}째 with score {best_score:.6f}")

In [None]:
# Compare our current N=1 score with optimal
print(f"Current N=1 score: {scores_by_n[1]:.6f}")
print(f"Optimal N=1 score: {best_score:.6f}")
print(f"Difference: {scores_by_n[1] - best_score:.6f}")

In [None]:
# Load all available submissions and compare
import os

submission_files = [
    '/home/code/submission_candidates/candidate_000.csv',
    '/home/code/submission_candidates/candidate_001.csv',
    '/home/code/submission_candidates/candidate_002.csv',
    '/home/code/submission_candidates/candidate_003.csv',
]

all_scores = {}
for fp in submission_files:
    if os.path.exists(fp):
        df_sub = pd.read_csv(fp)
        df_sub['N'] = df_sub['id'].str.split('_').str[0].astype(int)
        scores = {}
        for n, g in df_sub.groupby('N'):
            xs = strip(g['x'].to_numpy())
            ys = strip(g['y'].to_numpy())
            ds = strip(g['deg'].to_numpy())
            sc = score_group(xs, ys, ds, tx, ty)
            scores[n] = sc
        total = sum(scores.values())
        all_scores[fp] = {'total': total, 'by_n': scores}
        print(f"{os.path.basename(fp)}: {total:.6f}")

In [None]:
# Create ensemble: for each N, take the best configuration
print("\nCreating ensemble from all submissions...")
best_per_n = {}
for n in range(1, 201):
    best_score = float('inf')
    best_source = None
    for fp, data in all_scores.items():
        if n in data['by_n'] and data['by_n'][n] < best_score:
            best_score = data['by_n'][n]
            best_source = fp
    best_per_n[n] = {'score': best_score, 'source': best_source}

ensemble_score = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"Ensemble score: {ensemble_score:.6f}")
print(f"Best single submission: {min(d['total'] for d in all_scores.values()):.6f}")
print(f"Improvement from ensemble: {min(d['total'] for d in all_scores.values()) - ensemble_score:.6f}")

In [None]:
# Analyze where ensemble helps
print("\nN values where ensemble improves over best single submission:")
best_single = min(all_scores.items(), key=lambda x: x[1]['total'])
best_single_scores = best_single[1]['by_n']

improvements = []
for n in range(1, 201):
    if best_per_n[n]['source'] != best_single[0]:
        improvement = best_single_scores[n] - best_per_n[n]['score']
        if improvement > 0.0001:
            improvements.append((n, improvement, best_per_n[n]['source']))

improvements.sort(key=lambda x: x[1], reverse=True)
print(f"Found {len(improvements)} N values with improvement")
for n, imp, src in improvements[:20]:
    print(f"  N={n}: +{imp:.6f} from {os.path.basename(src)}")

In [None]:
# Summary: What's needed to reach target
print("="*60)
print("SUMMARY: Path to Target")
print("="*60)
print(f"\nCurrent best: {min(d['total'] for d in all_scores.values()):.6f}")
print(f"Ensemble best: {ensemble_score:.6f}")
print(f"Target: 68.95")
print(f"Gap from ensemble: {ensemble_score - 68.95:.6f}")

print("\n" + "="*60)
print("Score breakdown (ensemble):")
for start, end in ranges:
    range_score = sum(best_per_n[n]['score'] for n in range(start, end+1))
    print(f"  N={start}-{end}: {range_score:.6f}")

print("\n" + "="*60)
print("Key insights:")
print("1. Ensemble provides marginal improvement over best single")
print("2. Most score comes from N=11-200 (grid placement near-optimal)")
print("3. Need significant improvements across ALL N ranges")
print("4. C++ implementation with 150k iterations is needed for competitive scores")