# Loop 23 Analysis: Per-N Efficiency and Improvement Potential

After 24 experiments, all approaches converge to ~70.627. The evaluator recommends:
1. Analyze per-N efficiency to find worst performers
2. Target specific N values with highest improvement potential
3. Manual inspection of worst N values

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import matplotlib.pyplot as plt

TREE_TEMPLATE = [
    (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
    (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
    (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5)
]

def parse_s_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def create_tree_polygon(x, y, angle):
    tree = Polygon(TREE_TEMPLATE)
    tree = rotate(tree, angle, origin=(0, 0), use_radians=False)
    tree = translate(tree, x, y)
    return tree

def get_bounding_box_side(trees):
    all_x, all_y = [], []
    for tree in trees:
        minx, miny, maxx, maxy = tree.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

# Load baseline
df = pd.read_csv('/home/submission/submission.csv')
df['x'] = df['x'].apply(parse_s_value)
df['y'] = df['y'].apply(parse_s_value)
df['deg'] = df['deg'].apply(parse_s_value)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

print(f"Loaded {len(df)} rows")
print(f"N range: {df['n'].min()} to {df['n'].max()}")
print(f"Total trees: {df.groupby('n').size().sum()}")
print(f"Expected: {sum(range(1, 201))} = {200*201//2}")
print()

In [None]:
# Calculate per-N scores and efficiency
results = []

for n in range(1, 201):
    group = df[df['n'] == n]
    trees = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    side = get_bounding_box_side(trees)
    score = (side ** 2) / n
    
    # Calculate theoretical minimum (single tree area * n / n = single tree area)
    # But for packing, the theoretical minimum is harder to compute
    # Let's use the tree's bounding box as a reference
    single_tree = create_tree_polygon(0, 0, 0)
    tree_width = single_tree.bounds[2] - single_tree.bounds[0]  # 0.7
    tree_height = single_tree.bounds[3] - single_tree.bounds[1]  # 1.0
    tree_area = single_tree.area
    
    # Theoretical minimum: if we could pack perfectly, area = n * tree_area
    # So side = sqrt(n * tree_area), score = n * tree_area / n = tree_area
    # But trees can't overlap, so this is a lower bound
    theoretical_min_area = n * tree_area
    theoretical_min_side = np.sqrt(theoretical_min_area)
    theoretical_min_score = tree_area  # ~0.3325 for this tree
    
    # Efficiency: how close to theoretical minimum
    efficiency = theoretical_min_score / score  # Higher is better
    
    results.append({
        'n': n,
        'side': side,
        'score': score,
        'theoretical_min': theoretical_min_score,
        'efficiency': efficiency,
        'gap': score - theoretical_min_score
    })

results_df = pd.DataFrame(results)
print(f"Tree area: {tree_area:.6f}")
print(f"Tree dimensions: {tree_width:.3f} x {tree_height:.3f}")
print()
print("Per-N Score Statistics:")
print(results_df[['n', 'score', 'efficiency', 'gap']].describe())

In [None]:
# Find worst performers (lowest efficiency)
print("\n" + "="*70)
print("WORST PERFORMERS (lowest efficiency - highest improvement potential)")
print("="*70)
worst = results_df.nsmallest(20, 'efficiency')
for _, row in worst.iterrows():
    print(f"N={row['n']:3.0f}: score={row['score']:.6f}, efficiency={row['efficiency']:.4f}, gap={row['gap']:.6f}")

print("\n" + "="*70)
print("BEST PERFORMERS (highest efficiency)")
print("="*70)
best = results_df.nlargest(20, 'efficiency')
for _, row in best.iterrows():
    print(f"N={row['n']:3.0f}: score={row['score']:.6f}, efficiency={row['efficiency']:.4f}, gap={row['gap']:.6f}")

In [None]:
# Plot efficiency vs N
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.scatter(results_df['n'], results_df['score'], s=10, alpha=0.7)
plt.xlabel('N')
plt.ylabel('Score (S²/N)')
plt.title('Per-N Score')
plt.axhline(y=results_df['score'].mean(), color='r', linestyle='--', label=f'Mean: {results_df["score"].mean():.4f}')
plt.legend()

plt.subplot(1, 2, 2)
plt.scatter(results_df['n'], results_df['efficiency'], s=10, alpha=0.7)
plt.xlabel('N')
plt.ylabel('Efficiency (theoretical_min / score)')
plt.title('Per-N Efficiency')
plt.axhline(y=results_df['efficiency'].mean(), color='r', linestyle='--', label=f'Mean: {results_df["efficiency"].mean():.4f}')
plt.legend()

plt.tight_layout()
plt.savefig('/home/code/exploration/per_n_analysis.png', dpi=150)
plt.show()

print(f"\nTotal score: {results_df['score'].sum():.6f}")
print(f"Target: 68.919154")
print(f"Gap: {results_df['score'].sum() - 68.919154:.6f}")

In [None]:
# Analyze score contribution by N range
ranges = [
    (1, 10, 'N=1-10'),
    (11, 20, 'N=11-20'),
    (21, 50, 'N=21-50'),
    (51, 100, 'N=51-100'),
    (101, 150, 'N=101-150'),
    (151, 200, 'N=151-200')
]

print("\nScore contribution by N range:")
print("="*60)
for start, end, label in ranges:
    mask = (results_df['n'] >= start) & (results_df['n'] <= end)
    range_score = results_df.loc[mask, 'score'].sum()
    range_count = mask.sum()
    avg_efficiency = results_df.loc[mask, 'efficiency'].mean()
    print(f"{label:15s}: score={range_score:8.4f} ({range_count:3d} N values), avg_efficiency={avg_efficiency:.4f}")

print(f"\nTotal: {results_df['score'].sum():.6f}")

In [None]:
# Look at specific N values that might have symmetric optimal solutions
# From discussions: perfect squares, triangular numbers, etc.

print("\nAnalysis of special N values:")
print("="*60)

# Perfect squares
print("\nPerfect squares (N = k²):")
for k in range(1, 15):
    n = k * k
    if n <= 200:
        row = results_df[results_df['n'] == n].iloc[0]
        print(f"  N={n:3d} (k={k}): score={row['score']:.6f}, efficiency={row['efficiency']:.4f}")

# Triangular numbers
print("\nTriangular numbers (N = k(k+1)/2):")
for k in range(1, 20):
    n = k * (k + 1) // 2
    if n <= 200:
        row = results_df[results_df['n'] == n].iloc[0]
        print(f"  N={n:3d} (k={k}): score={row['score']:.6f}, efficiency={row['efficiency']:.4f}")

In [None]:
# Visualize a few worst-performing N values
from shapely.geometry import box

def visualize_n(n, ax, title_suffix=''):
    group = df[df['n'] == n]
    trees = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    
    # Get bounding box
    all_x, all_y = [], []
    for tree in trees:
        minx, miny, maxx, maxy = tree.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    
    min_x, max_x = min(all_x), max(all_x)
    min_y, max_y = min(all_y), max(all_y)
    side = max(max_x - min_x, max_y - min_y)
    
    # Plot trees
    for tree in trees:
        x, y = tree.exterior.xy
        ax.fill(x, y, alpha=0.5, fc='green', ec='darkgreen')
    
    # Plot bounding square
    center_x = (min_x + max_x) / 2
    center_y = (min_y + max_y) / 2
    bbox = box(center_x - side/2, center_y - side/2, center_x + side/2, center_y + side/2)
    x, y = bbox.exterior.xy
    ax.plot(x, y, 'r--', linewidth=2)
    
    ax.set_aspect('equal')
    score = (side ** 2) / n
    ax.set_title(f'N={n}, S²/N={score:.4f}{title_suffix}')

# Visualize worst performers
worst_ns = worst['n'].values[:6].astype(int)
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
for i, n in enumerate(worst_ns):
    visualize_n(n, axes[i//3, i%3], ' (WORST)')
plt.tight_layout()
plt.savefig('/home/code/exploration/worst_n_visualization.png', dpi=150)
plt.show()

In [None]:
# Check if there are any N values where the baseline might not be optimal
# by looking at the angle distribution

print("\nAngle distribution analysis:")
print("="*60)

for n in [1, 2, 3, 4, 5, 10, 15, 20, 50, 100, 200]:
    group = df[df['n'] == n]
    angles = group['deg'].values
    unique_angles = np.unique(np.round(angles % 360, 1))
    print(f"N={n:3d}: {len(unique_angles):2d} unique angles: {sorted(unique_angles)[:5]}..." if len(unique_angles) > 5 else f"N={n:3d}: {len(unique_angles):2d} unique angles: {sorted(unique_angles)}")

print("\nThis shows whether solutions use symmetric (few angles) or asymmetric (many angles) configurations.")

In [None]:
# Calculate the improvement needed per N to reach target
target = 68.919154
current_total = results_df['score'].sum()
gap = current_total - target

print(f"\nImprovement needed:")
print(f"  Current total: {current_total:.6f}")
print(f"  Target: {target:.6f}")
print(f"  Gap: {gap:.6f}")
print(f"  Gap per N (if uniform): {gap/200:.6f}")

# If we could improve the worst 20 N values by 10% each
worst_20_score = worst['score'].sum()
print(f"\nWorst 20 N values total score: {worst_20_score:.6f}")
print(f"If improved by 10%: {worst_20_score * 0.9:.6f}")
print(f"Improvement: {worst_20_score * 0.1:.6f}")
print(f"Would close gap by: {worst_20_score * 0.1 / gap * 100:.1f}%")