# Loop 32 Strategic Analysis

## Key Questions:
1. What is our per-N score breakdown?
2. Which N values have the largest gaps to theoretical minimum?
3. What approaches have NOT been tried?
4. What would it take to close the 1.705 point gap?

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import json

# Tree shape
TREE_VERTICES = np.array([
    [0.0, 0.8], [0.125, 0.5], [0.0625, 0.5], [0.2, 0.25], [0.1, 0.25],
    [0.35, 0.0], [0.075, 0.0], [0.075, -0.2], [-0.075, -0.2], [-0.075, 0.0],
    [-0.35, 0.0], [-0.1, 0.25], [-0.2, 0.25], [-0.0625, 0.5], [-0.125, 0.5],
], dtype=np.float64)

def create_tree_polygon(x, y, deg):
    tree = Polygon(TREE_VERTICES)
    tree = rotate(tree, deg, origin=(0, 0))
    tree = translate(tree, x, y)
    return tree

def calculate_score(trees):
    all_bounds = [t.bounds for t in trees]
    min_x = min(b[0] for b in all_bounds)
    min_y = min(b[1] for b in all_bounds)
    max_x = max(b[2] for b in all_bounds)
    max_y = max(b[3] for b in all_bounds)
    side = max(max_x - min_x, max_y - min_y)
    return side * side / len(trees)

def parse_value(v):
    if isinstance(v, str) and v.startswith('s'):
        return float(v[1:])
    return float(v)

# Calculate tree area
tree = Polygon(TREE_VERTICES)
TREE_AREA = tree.area
print(f"Tree area: {TREE_AREA:.6f}")
print(f"Theoretical minimum score per tree: {TREE_AREA:.6f}")

In [None]:
# Load current best submission
df = pd.read_csv('/home/submission/submission.csv')

# Calculate per-N scores
per_n_scores = {}
for n in range(1, 201):
    prefix = f"{n:03d}_"
    group = df[df["id"].str.startswith(prefix)].sort_values("id")
    configs = []
    for _, row in group.iterrows():
        x = parse_value(row["x"])
        y = parse_value(row["y"])
        deg = parse_value(row["deg"])
        configs.append((x, y, deg))
    trees = [create_tree_polygon(x, y, deg) for x, y, deg in configs]
    per_n_scores[n] = calculate_score(trees)

total_score = sum(per_n_scores.values())
print(f"Total score: {total_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {total_score - 68.919154:.6f}")

In [None]:
# Analyze per-N efficiency
results = []
for n, score in per_n_scores.items():
    theoretical_min = TREE_AREA  # Minimum possible is tree_area per tree
    efficiency = theoretical_min / score
    gap = score - theoretical_min
    contribution = score / total_score * 100
    results.append({
        'N': n,
        'score': score,
        'theoretical_min': theoretical_min,
        'efficiency': efficiency,
        'gap': gap,
        'contribution': contribution
    })

results_df = pd.DataFrame(results)
print("\nTop 20 N values with LARGEST gaps (most room for improvement):")
print(results_df.sort_values('gap', ascending=False).head(20).to_string())

In [None]:
# Calculate how much improvement we need from each N
TARGET = 68.919154
CURRENT = total_score
GAP = CURRENT - TARGET

print(f"\nTotal gap to close: {GAP:.6f}")
print(f"\nIf we improved EVERY N by the same percentage:")
required_improvement_pct = GAP / CURRENT * 100
print(f"  Required improvement: {required_improvement_pct:.2f}%")

print(f"\nIf we only improved the TOP 20 worst N values:")
top20_worst = results_df.sort_values('gap', ascending=False).head(20)
top20_score = top20_worst['score'].sum()
print(f"  Top 20 worst N values contribute: {top20_score:.4f} ({top20_score/CURRENT*100:.1f}% of total)")
print(f"  Required improvement in these 20: {GAP/top20_score*100:.2f}%")

In [None]:
# Analyze efficiency distribution
print("\nEfficiency distribution:")
print(f"  Min efficiency: {results_df['efficiency'].min():.4f} (N={results_df.loc[results_df['efficiency'].idxmin(), 'N']})")
print(f"  Max efficiency: {results_df['efficiency'].max():.4f} (N={results_df.loc[results_df['efficiency'].idxmax(), 'N']})")
print(f"  Mean efficiency: {results_df['efficiency'].mean():.4f}")
print(f"  Median efficiency: {results_df['efficiency'].median():.4f}")

print("\nN values with efficiency < 0.50 (worst performers):")
worst = results_df[results_df['efficiency'] < 0.50].sort_values('efficiency')
print(worst[['N', 'score', 'efficiency', 'gap']].to_string())

In [None]:
# What would target efficiency look like?
print("\n" + "="*60)
print("TARGET ANALYSIS")
print("="*60)

# If we achieved target score, what would average efficiency be?
target_avg_efficiency = (TREE_AREA * 200) / TARGET
print(f"\nTarget score: {TARGET:.6f}")
print(f"If target achieved with uniform efficiency: {target_avg_efficiency:.4f}")
print(f"Current average efficiency: {results_df['efficiency'].mean():.4f}")

# How many N values are already at or above target efficiency?
above_target = results_df[results_df['efficiency'] >= target_avg_efficiency]
print(f"\nN values already at target efficiency: {len(above_target)} / 200")

# What's the gap for N values below target?
below_target = results_df[results_df['efficiency'] < target_avg_efficiency]
print(f"N values below target efficiency: {len(below_target)} / 200")
print(f"Total gap from these N values: {below_target['gap'].sum():.4f}")

In [None]:
# Identify specific N values that need the most improvement
print("\n" + "="*60)
print("PRIORITY N VALUES FOR IMPROVEMENT")
print("="*60)

# Sort by gap (absolute improvement potential)
priority = results_df.sort_values('gap', ascending=False).head(30)
print("\nTop 30 N values by gap (most improvement potential):")
for _, row in priority.iterrows():
    n = int(row['N'])
    score = row['score']
    eff = row['efficiency']
    gap = row['gap']
    print(f"  N={n:3d}: score={score:.6f}, efficiency={eff:.4f}, gap={gap:.6f}")

In [None]:
# Summary of what we need
print("\n" + "="*60)
print("SUMMARY: WHAT WE NEED TO DO")
print("="*60)

print(f"\nCurrent score: {CURRENT:.6f}")
print(f"Target score: {TARGET:.6f}")
print(f"Gap: {GAP:.6f} ({GAP/CURRENT*100:.2f}%)")

print(f"\nOur score is BETTER than public LB leader (71.191) by {71.191 - CURRENT:.3f} points")
print(f"But we need {GAP:.3f} MORE points to reach target")

print("\nKey insights:")
print(f"  - Tree area (theoretical min): {TREE_AREA:.6f}")
print(f"  - Average efficiency: {results_df['efficiency'].mean():.4f}")
print(f"  - Worst efficiency: {results_df['efficiency'].min():.4f} (N={results_df.loc[results_df['efficiency'].idxmin(), 'N']})")
print(f"  - Best efficiency: {results_df['efficiency'].max():.4f} (N={results_df.loc[results_df['efficiency'].idxmax(), 'N']})")

print("\nTo reach target, we need to either:")
print(f"  1. Improve ALL N values by {required_improvement_pct:.2f}% uniformly")
print(f"  2. Focus on worst N values and improve them significantly")
print(f"  3. Find fundamentally better packing patterns (asymmetric?)")
print(f"  4. Use professional nesting software (sparroWASM?)")
print(f"  5. Manual optimization of specific N values")