# Loop 1 LB Feedback Analysis

**LB Score: 70.6600** (matches CV exactly - perfect calibration!)

**Gap to target: 1.74 points (2.5%)**

This notebook analyzes:
1. Per-N score contributions to identify improvement opportunities
2. Which N values contribute most to total score
3. Comparison with theoretical optimal packing efficiency

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Polygon

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

# Single tree area
tree_poly = Polygon(zip(TX, TY))
TREE_AREA = tree_poly.area
print(f"Single tree area: {TREE_AREA:.6f}")
print(f"Tree bounding box: width={max(TX)-min(TX):.3f}, height={max(TY)-min(TY):.3f}")

In [None]:
# Load best submission
df = pd.read_csv('/home/code/external_data/saspav/santa-2025.csv')
print(f"Loaded {len(df)} rows")

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = []
    for tx, ty in zip(TX, TY):
        rx = tx * cos_a - ty * sin_a + x
        ry = tx * sin_a + ty * cos_a + y
        vertices.append((rx, ry))
    return Polygon(vertices)

def compute_bounding_side(polygons):
    if not polygons:
        return 0
    all_points = []
    for poly in polygons:
        all_points.extend(list(poly.exterior.coords))
    all_points = np.array(all_points)
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

In [None]:
# Compute per-N scores
per_n_scores = []
per_n_sides = []
per_n_efficiency = []  # How close to theoretical optimal

for n in range(1, 201):
    prefix = f"{n:03d}_"
    trees = df[df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        polygons.append(create_tree_polygon(x, y, deg))
    
    side = compute_bounding_side(polygons)
    score_n = side**2 / n
    
    # Theoretical minimum: if trees packed perfectly (no gaps)
    # Total tree area = n * TREE_AREA
    # Minimum square side = sqrt(n * TREE_AREA)
    theoretical_min_side = np.sqrt(n * TREE_AREA)
    efficiency = (theoretical_min_side / side) ** 2  # Packing efficiency
    
    per_n_scores.append(score_n)
    per_n_sides.append(side)
    per_n_efficiency.append(efficiency)

print(f"Total score: {sum(per_n_scores):.6f}")
print(f"Target score: 68.919154")
print(f"Gap: {sum(per_n_scores) - 68.919154:.6f}")

In [None]:
# Analyze score distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Score per N
ax1 = axes[0, 0]
ax1.plot(range(1, 201), per_n_scores, 'b-', alpha=0.7)
ax1.set_xlabel('N (number of trees)')
ax1.set_ylabel('Score contribution (sideÂ²/n)')
ax1.set_title('Score Contribution per N')
ax1.grid(True, alpha=0.3)

# Cumulative score
ax2 = axes[0, 1]
cumulative = np.cumsum(per_n_scores)
ax2.plot(range(1, 201), cumulative, 'g-')
ax2.axhline(y=68.919154, color='r', linestyle='--', label='Target')
ax2.set_xlabel('N')
ax2.set_ylabel('Cumulative Score')
ax2.set_title('Cumulative Score vs Target')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Packing efficiency
ax3 = axes[1, 0]
ax3.plot(range(1, 201), per_n_efficiency, 'purple', alpha=0.7)
ax3.set_xlabel('N')
ax3.set_ylabel('Packing Efficiency')
ax3.set_title('Packing Efficiency (theoretical_area / actual_area)')
ax3.grid(True, alpha=0.3)

# Side length vs sqrt(N)
ax4 = axes[1, 1]
ax4.scatter(range(1, 201), per_n_sides, s=10, alpha=0.5, label='Actual side')
ax4.plot(range(1, 201), [np.sqrt(n * TREE_AREA) for n in range(1, 201)], 'r-', label='Theoretical min')
ax4.set_xlabel('N')
ax4.set_ylabel('Bounding box side')
ax4.set_title('Bounding Box Side vs N')
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('/home/code/exploration/per_n_analysis.png', dpi=100)
plt.show()
print("Saved analysis plot")

In [None]:
# Find N values with most room for improvement
# If we could improve efficiency to match the best N values, how much would we gain?

results = pd.DataFrame({
    'N': range(1, 201),
    'score': per_n_scores,
    'side': per_n_sides,
    'efficiency': per_n_efficiency
})

# Sort by efficiency (lowest = most room for improvement)
results_sorted = results.sort_values('efficiency')
print("N values with LOWEST efficiency (most room for improvement):")
print(results_sorted.head(20).to_string())

print("\n" + "="*60)
print("N values with HIGHEST efficiency (best packed):")
print(results_sorted.tail(10).to_string())

In [None]:
# Calculate potential improvement if we could match best efficiency
best_efficiency = results['efficiency'].max()
print(f"Best efficiency achieved: {best_efficiency:.4f} at N={results.loc[results['efficiency'].idxmax(), 'N']}")

# If all N had this efficiency, what would be the score?
theoretical_best_scores = []
for n in range(1, 201):
    theoretical_min_side = np.sqrt(n * TREE_AREA)
    best_achievable_side = theoretical_min_side / np.sqrt(best_efficiency)
    theoretical_best_scores.append(best_achievable_side**2 / n)

print(f"\nIf all N matched best efficiency:")
print(f"  Theoretical score: {sum(theoretical_best_scores):.6f}")
print(f"  Current score: {sum(per_n_scores):.6f}")
print(f"  Potential improvement: {sum(per_n_scores) - sum(theoretical_best_scores):.6f}")

In [None]:
# Analyze score contribution by N ranges
ranges = [(1, 20), (21, 50), (51, 100), (101, 150), (151, 200)]

print("Score contribution by N range:")
print("="*60)
for start, end in ranges:
    range_score = sum(per_n_scores[start-1:end])
    range_pct = range_score / sum(per_n_scores) * 100
    avg_efficiency = np.mean(per_n_efficiency[start-1:end])
    print(f"N={start:3d}-{end:3d}: Score={range_score:6.3f} ({range_pct:5.1f}%), Avg Efficiency={avg_efficiency:.4f}")

print("\nKey insight: Higher N values contribute more to total score")
print("Focus optimization on N > 100 for maximum impact")

In [None]:
# Summary
print("="*60)
print("SUMMARY")
print("="*60)
print(f"Current score: {sum(per_n_scores):.6f}")
print(f"Target score: 68.919154")
print(f"Gap: {sum(per_n_scores) - 68.919154:.6f} ({(sum(per_n_scores) - 68.919154)/68.919154*100:.2f}%)")
print(f"\nAverage packing efficiency: {np.mean(per_n_efficiency):.4f}")
print(f"Best packing efficiency: {max(per_n_efficiency):.4f} at N={per_n_efficiency.index(max(per_n_efficiency))+1}")
print(f"Worst packing efficiency: {min(per_n_efficiency):.4f} at N={per_n_efficiency.index(min(per_n_efficiency))+1}")
print("\nTo reach target, need ~2.5% improvement across all N values")
print("This requires fundamentally better packing configurations, not just optimization")