# Loop 9 Analysis: First Improvement Achieved!

After 8 experiments stuck at 70.676102, we finally achieved an improvement to 70.659944.

## Key Questions:
1. Which N values improved and by how much?
2. What's the potential for further improvement?
3. What approaches should we try next?

In [None]:
import pandas as pd
import numpy as np
import glob
import os
from collections import defaultdict

# Load our best submission
submission = pd.read_csv('/home/submission/submission.csv', dtype=str)
print(f"Loaded submission with {len(submission)} rows")

In [None]:
# Load baseline for comparison
baseline = pd.read_csv('/home/code/experiments/001_baseline/santa-2025.csv', dtype=str)
print(f"Loaded baseline with {len(baseline)} rows")

In [None]:
# Calculate scores for each N in both submission and baseline
from shapely.geometry import Polygon
from shapely import affinity

TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_polygon(x, y, deg):
    coords = list(zip(TX, TY))
    base_poly = Polygon(coords)
    rotated = affinity.rotate(base_poly, deg, origin=(0, 0))
    return affinity.translate(rotated, x, y)

def get_bounding_box_side(trees):
    if not trees:
        return float('inf')
    all_x, all_y = [], []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def load_trees_for_n(df, n):
    prefix = f'{n:03d}_'
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = float(str(row['x']).replace('s', ''))
        y = float(str(row['y']).replace('s', ''))
        deg = float(str(row['deg']).replace('s', ''))
        trees.append((x, y, deg))
    return trees

print("Functions defined")

In [None]:
# Calculate per-N scores for both
baseline_scores = {}
submission_scores = {}

for n in range(1, 201):
    # Baseline
    trees = load_trees_for_n(baseline, n)
    if len(trees) == n:
        side = get_bounding_box_side(trees)
        baseline_scores[n] = side**2 / n
    
    # Submission
    trees = load_trees_for_n(submission, n)
    if len(trees) == n:
        side = get_bounding_box_side(trees)
        submission_scores[n] = side**2 / n

print(f"Baseline total: {sum(baseline_scores.values()):.6f}")
print(f"Submission total: {sum(submission_scores.values()):.6f}")
print(f"Improvement: {sum(baseline_scores.values()) - sum(submission_scores.values()):.6f}")

In [None]:
# Find which N values improved
improvements = []
for n in range(1, 201):
    if n in baseline_scores and n in submission_scores:
        diff = baseline_scores[n] - submission_scores[n]
        if diff > 0.0001:  # Significant improvement
            improvements.append({
                'n': n,
                'baseline': baseline_scores[n],
                'submission': submission_scores[n],
                'improvement': diff,
                'pct_improvement': 100 * diff / baseline_scores[n]
            })

improvements_df = pd.DataFrame(improvements)
improvements_df = improvements_df.sort_values('improvement', ascending=False)
print(f"\n{len(improvements_df)} N values improved:")
print(improvements_df.to_string(index=False))

In [None]:
# Analyze potential for further improvement
print("\n=== Gap Analysis ===")
print(f"Current score: 70.659944")
print(f"Target score: 68.919154")
print(f"Gap: {70.659944 - 68.919154:.6f} points ({100*(70.659944 - 68.919154)/70.659944:.2f}%)")

# If we improved 24 N values by 0.016, how many more improvements do we need?
print(f"\nImprovement achieved: 0.016158 points")
print(f"Improvements needed: {(70.659944 - 68.919154) / 0.016158:.1f}x more")
print(f"\nThis means we need to find ~108x more improvement!")
print("\nPossible strategies:")
print("1. Backward iteration (BackPacking) - propagate good configs from larger N to smaller N")
print("2. Targeted SA on improved N values - they may be in better basins")
print("3. Find more CSV sources with better configurations")
print("4. Symmetric packing patterns for specific N values")

In [None]:
# Check which N values have the most room for improvement
# Compare to theoretical minimum (packing efficiency)
print("\n=== Per-N Score Analysis ===")
print("\nN values with highest scores (most room for improvement):")
scores_list = [(n, submission_scores[n]) for n in range(1, 201)]
scores_list.sort(key=lambda x: x[1], reverse=True)
for n, score in scores_list[:20]:
    print(f"  N={n}: score={score:.6f}")

In [None]:
# Check N values that DIDN'T improve - are they already optimal?
print("\n=== N values that didn't improve ===")
no_improvement = []
for n in range(1, 201):
    if n in baseline_scores and n in submission_scores:
        diff = baseline_scores[n] - submission_scores[n]
        if diff <= 0.0001:
            no_improvement.append(n)

print(f"{len(no_improvement)} N values showed no improvement")
print(f"\nSmall N (1-20) that didn't improve: {[n for n in no_improvement if n <= 20]}")
print(f"These are likely already at or near optimal.")