# Loop 2 LB Feedback Analysis

## Situation
- Both submissions scored exactly 70.7343 (CV = LB perfectly)
- Extended optimization (45 min) found only 3 tiny improvements that don't show in rounded score
- The santa-2025.csv starting submission is already at a very strong local optimum
- Gap to target: 1.803 points (2.55%)

In [None]:
import pandas as pd
import numpy as np
import os

# Load the current best submission
submission_path = '/home/code/datasets/santa-2025-csv/santa-2025.csv'
df = pd.read_csv(submission_path)
print(f"Loaded {len(df)} rows")
print(df.head())

In [None]:
# Calculate per-N scores
def strip_s(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

# Tree polygon vertices
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_bbox(x, y, deg):
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    px = TX * c - TY * s + x
    py = TX * s + TY * c + y
    return px.min(), py.min(), px.max(), py.max()

def calc_n_score(group):
    n = len(group)
    minx, miny, maxx, maxy = np.inf, np.inf, -np.inf, -np.inf
    for _, row in group.iterrows():
        x = strip_s(row['x'])
        y = strip_s(row['y'])
        deg = strip_s(row['deg'])
        x0, y0, x1, y1 = get_tree_bbox(x, y, deg)
        minx = min(minx, x0)
        miny = min(miny, y0)
        maxx = max(maxx, x1)
        maxy = max(maxy, y1)
    side = max(maxx - minx, maxy - miny)
    return side, side**2 / n

# Calculate scores for each N
df['N'] = df['id'].str.split('_').str[0].astype(int)
scores = []
for n, group in df.groupby('N'):
    side, score = calc_n_score(group)
    scores.append({'N': n, 'side': side, 'score': score})

scores_df = pd.DataFrame(scores)
print(f"Total score: {scores_df['score'].sum():.6f}")
print(f"\nWorst 20 N values by score contribution:")
print(scores_df.nlargest(20, 'score')[['N', 'side', 'score']])

In [None]:
# Calculate efficiency (how close to theoretical optimal)
# Theoretical optimal for N trees: side = sqrt(N * tree_area)
# Tree area is approximately 0.35 * 0.8 = 0.28 (rough estimate)
tree_area = 0.28  # approximate

scores_df['theoretical_side'] = np.sqrt(scores_df['N'] * tree_area)
scores_df['efficiency'] = scores_df['theoretical_side'] / scores_df['side']

print("Efficiency analysis (higher is better, max 1.0):")
print(f"Mean efficiency: {scores_df['efficiency'].mean():.4f}")
print(f"Min efficiency: {scores_df['efficiency'].min():.4f} at N={scores_df.loc[scores_df['efficiency'].idxmin(), 'N']}")
print(f"Max efficiency: {scores_df['efficiency'].max():.4f} at N={scores_df.loc[scores_df['efficiency'].idxmax(), 'N']}")

print("\nWorst 20 N values by efficiency:")
print(scores_df.nsmallest(20, 'efficiency')[['N', 'side', 'score', 'efficiency']])

In [None]:
# Gap analysis
target = 68.931058
current = scores_df['score'].sum()
gap = current - target

print(f"Current score: {current:.6f}")
print(f"Target score: {target:.6f}")
print(f"Gap: {gap:.6f} ({gap/current*100:.2f}%)")

# How much improvement needed per N on average?
print(f"\nAverage improvement needed per N: {gap/200:.6f}")
print(f"Average % improvement needed: {gap/current*100:.2f}%")

# If we improve worst 20 N values by X%, what's the impact?
worst_20_score = scores_df.nlargest(20, 'score')['score'].sum()
print(f"\nWorst 20 N values contribute: {worst_20_score:.4f} ({worst_20_score/current*100:.2f}% of total)")
print(f"To close gap by improving worst 20: need {gap/worst_20_score*100:.2f}% improvement on worst 20")

In [None]:
# Analyze what the egortrushin kernel does differently
# Key insight: For large N (>=58), use periodic/crystalline structures
# This means arranging trees in a regular grid pattern with unit cells

print("Key insight from egortrushin kernel:")
print("="*50)
print("For N >= 58, use PERIODIC STRUCTURES:")
print("- Define a 'unit cell' of trees")
print("- Tile the unit cell to create the full configuration")
print("- Optimize the unit cell parameters (translations, rotations)")
print("- This can find configurations that standard SA cannot reach")
print()
print("The approach uses:")
print("1. nt = [rows, cols] - number of unit cells in each direction")
print("2. Translations determined via SA updates")
print("3. All trees in unit cell rotated by same angle")
print("4. Option to translate only one tree during last translation")
print()
print("This is fundamentally different from our current approach!")
print("Current approach: optimize each tree independently")
print("Periodic approach: optimize unit cell, then tile")

In [None]:
# Check which N values might benefit from periodic structures
print("N values that might benefit from periodic structures (N >= 58):")
large_n = scores_df[scores_df['N'] >= 58]
print(f"Number of large N values: {len(large_n)}")
print(f"Score contribution from large N: {large_n['score'].sum():.4f} ({large_n['score'].sum()/current*100:.2f}%)")

# Factorization analysis - which N values have nice factorizations?
print("\nN values with nice factorizations (for periodic structures):")
for n in range(58, 201):
    factors = []
    for i in range(2, int(np.sqrt(n)) + 1):
        if n % i == 0:
            factors.append((i, n // i))
    if factors:
        print(f"N={n}: {factors}")

In [None]:
# Summary of key findings
print("="*60)
print("KEY FINDINGS FOR NEXT EXPERIMENT")
print("="*60)
print()
print("1. CURRENT STATE:")
print(f"   - Score: 70.7343 (gap to target: 1.803 = 2.55%)")
print(f"   - Standard SA/local search cannot improve the score")
print(f"   - The starting submission is at a very strong local optimum")
print()
print("2. STRATEGIC PIVOT REQUIRED:")
print("   - Implement PERIODIC STRUCTURE optimization (egortrushin approach)")
print("   - For N >= 58, use unit cell tiling instead of individual tree optimization")
print("   - This explores a fundamentally different part of the solution space")
print()
print("3. SPECIFIC ACTIONS:")
print("   a) Implement periodic SA from egortrushin kernel")
print("   b) Focus on N values with nice factorizations (60, 64, 72, 80, etc.)")
print("   c) Run for MUCH longer (hours, not minutes)")
print("   d) Try multiple random seeds to escape local optima")
print()
print("4. EXPECTED IMPACT:")
print(f"   - Large N (>=58) contributes {large_n['score'].sum():.2f} to total score")
print(f"   - Even 5% improvement on large N would save {large_n['score'].sum()*0.05:.2f} points")
print(f"   - Combined with small improvements elsewhere, target is achievable")