# Loop 2 LB Feedback Analysis

## Key Observations
- LB score: 70.676102 (matches CV exactly - no gap!)
- Target: 68.919154
- Gap to target: 1.757 points (2.5% improvement needed)

## What We've Learned
1. The pre-optimized ensemble.csv is at a tight local optimum
2. Simple optimizers (bbox3, tree_packer_v21) cannot escape this optimum
3. The CV-LB gap is 0 (perfect match) - this is a pure optimization problem

In [None]:
# Analyze the score breakdown and identify improvement opportunities
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate

# Christmas tree polygon vertices
TREE_VERTICES = np.array([
    (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
    (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
    (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5),
])

def load_submission(filepath):
    df = pd.read_csv(filepath)
    for col in ['x', 'y', 'deg']:
        df[col] = df[col].astype(str).str.replace('s', '', regex=False).astype(float)
    return df

def get_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    return df[df['id'].str.startswith(prefix)]

def get_bounding_box_side(df_subset):
    if len(df_subset) == 0:
        return 0
    all_points = []
    for _, row in df_subset.iterrows():
        base = Polygon(TREE_VERTICES)
        rotated = rotate(base, row['deg'], origin=(0, 0))
        translated = translate(rotated, row['x'], row['y'])
        all_points.extend(translated.exterior.coords)
    all_points = np.array(all_points)
    return max(all_points.max(axis=0) - all_points.min(axis=0))

print("Functions defined.")

In [None]:
# Load the best submission and calculate score breakdown
df = load_submission('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv')

scores = []
for n in range(1, 201):
    subset = get_trees_for_n(df, n)
    side = get_bounding_box_side(subset)
    score = side**2 / n
    scores.append({'n': n, 'side': side, 'score': score})

scores_df = pd.DataFrame(scores)
print(f"Total score: {scores_df['score'].sum():.6f}")
print(f"Target: 68.919154")
print(f"Gap: {scores_df['score'].sum() - 68.919154:.6f}")
print(f"\nScore breakdown by N range:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    subset = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]
    print(f"  N={start}-{end}: {subset['score'].sum():.4f} ({100*subset['score'].sum()/scores_df['score'].sum():.1f}%)")

In [None]:
# Calculate required improvement per N range to reach target
target = 68.919154
current = scores_df['score'].sum()
gap = current - target

print(f"Total gap to close: {gap:.6f}")
print(f"\nIf we improve each N range proportionally:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    subset = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]
    range_score = subset['score'].sum()
    range_pct = range_score / current
    required_improvement = gap * range_pct
    print(f"  N={start}-{end}: need to reduce by {required_improvement:.4f} ({100*required_improvement/range_score:.2f}%)")

In [None]:
# Identify which N values have the most room for improvement
# Compare current side to theoretical minimum (single tree at 45 degrees = 0.8132)
print("Top 20 N values by score contribution:")
scores_df_sorted = scores_df.sort_values('score', ascending=False)
print(scores_df_sorted.head(20).to_string())

print("\n\nN values with highest side/sqrt(n) ratio (potential for improvement):")
scores_df['efficiency'] = scores_df['side'] / np.sqrt(scores_df['n'])
print(scores_df.sort_values('efficiency', ascending=False).head(20).to_string())