# Loop 3 Analysis: Understanding the Gap to Target

## Key Questions:
1. What is the per-N breakdown of our current solution vs target?
2. Which N values have the most room for improvement?
3. What techniques haven't been tried yet?

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 25

# Load current best submission
df = pd.read_csv('/home/submission/submission.csv')
df['x'] = df['x'].str.strip('s').astype(float)
df['y'] = df['y'].str.strip('s').astype(float)
df['deg'] = df['deg'].str.strip('s').astype(float)
df['n'] = df['id'].str.split('_').str[0].astype(int)

print(f"Loaded {len(df)} rows")
print(f"N values: {df['n'].min()} to {df['n'].max()}")

In [None]:
# Tree shape coordinates
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(cx, cy, angle_deg):
    """Create a tree polygon at given center and angle"""
    angle_rad = np.radians(angle_deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    
    points = []
    for tx, ty in zip(TX, TY):
        rx = tx * cos_a - ty * sin_a + cx
        ry = tx * sin_a + ty * cos_a + cy
        points.append((rx, ry))
    
    return Polygon(points)

def get_side_length(trees_df):
    """Calculate bounding box side length for a group of trees"""
    polygons = [get_tree_polygon(row['x'], row['y'], row['deg']) for _, row in trees_df.iterrows()]
    union = unary_union(polygons)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

# Calculate per-N scores
per_n_scores = []
for n in range(1, 201):
    trees = df[df['n'] == n]
    if len(trees) == n:
        side = get_side_length(trees)
        score = side**2 / n
        per_n_scores.append({'n': n, 'side': side, 'score': score})

per_n_df = pd.DataFrame(per_n_scores)
print(f"Total score: {per_n_df['score'].sum():.6f}")

In [None]:
# Analyze which N values contribute most to the score
per_n_df_sorted = per_n_df.sort_values('score', ascending=False)
print("Top 20 N values by score contribution:")
print(per_n_df_sorted.head(20).to_string(index=False))

print(f"\nSum of top 20: {per_n_df_sorted.head(20)['score'].sum():.6f}")
print(f"Sum of top 50: {per_n_df_sorted.head(50)['score'].sum():.6f}")
print(f"Total: {per_n_df['score'].sum():.6f}")

In [None]:
# Calculate what improvement is needed per N to reach target
current_total = per_n_df['score'].sum()
target_total = 68.919154
gap = current_total - target_total

print(f"Current total: {current_total:.6f}")
print(f"Target total: {target_total:.6f}")
print(f"Gap: {gap:.6f}")
print(f"Gap as % of current: {100*gap/current_total:.2f}%")

# If we could improve each N proportionally, how much per N?
avg_improvement_per_n = gap / 200
print(f"\nAverage improvement needed per N: {avg_improvement_per_n:.6f}")

In [None]:
# Look at small N values (1-20) which contribute most
small_n = per_n_df[per_n_df['n'] <= 20]
print("Small N values (1-20):")
print(small_n.to_string(index=False))
print(f"\nSum of N=1-20: {small_n['score'].sum():.6f}")
print(f"This is {100*small_n['score'].sum()/current_total:.1f}% of total score")

In [None]:
# Check what the theoretical minimum might be for small N
# N=1: Single tree at 45 degrees has minimum bounding box
# Tree dimensions: height=1.0 (from -0.2 to 0.8), max width=0.7 at base

import math

# For a single tree at 45 degrees
angle = 45
polygon = get_tree_polygon(0, 0, angle)
bounds = polygon.bounds
side_45 = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
print(f"N=1 at 45 degrees: side={side_45:.6f}, score={side_45**2:.6f}")

# Check current N=1
n1_trees = df[df['n'] == 1]
print(f"\nCurrent N=1: x={n1_trees['x'].values[0]:.6f}, y={n1_trees['y'].values[0]:.6f}, deg={n1_trees['deg'].values[0]:.6f}")
current_n1_side = get_side_length(n1_trees)
print(f"Current N=1 side: {current_n1_side:.6f}, score: {current_n1_side**2:.6f}")

In [None]:
# Check N=2 - two trees
n2_trees = df[df['n'] == 2]
print("Current N=2:")
for _, row in n2_trees.iterrows():
    print(f"  x={row['x']:.6f}, y={row['y']:.6f}, deg={row['deg']:.6f}")
current_n2_side = get_side_length(n2_trees)
print(f"Current N=2 side: {current_n2_side:.6f}, score: {current_n2_side**2/2:.6f}")

In [None]:
# Analyze the efficiency of packing for different N values
# Efficiency = (N * tree_area) / (side^2)
# Higher efficiency means better packing

# Single tree area (approximate)
single_tree = get_tree_polygon(0, 0, 0)
tree_area = single_tree.area
print(f"Single tree area: {tree_area:.6f}")

per_n_df['efficiency'] = (per_n_df['n'] * tree_area) / (per_n_df['side']**2)
print("\nPacking efficiency by N:")
print(per_n_df[['n', 'side', 'score', 'efficiency']].head(30).to_string(index=False))

In [None]:
# Find N values with lowest efficiency (most room for improvement)
per_n_df_eff = per_n_df.sort_values('efficiency')
print("N values with LOWEST efficiency (most room for improvement):")
print(per_n_df_eff.head(20).to_string(index=False))

In [None]:
# Summary statistics
print("\n=== SUMMARY ===")
print(f"Current score: {current_total:.6f}")
print(f"Target score: {target_total:.6f}")
print(f"Gap: {gap:.6f} ({100*gap/current_total:.2f}%)")
print(f"\nSmall N (1-20) contribution: {small_n['score'].sum():.6f} ({100*small_n['score'].sum()/current_total:.1f}%)")
print(f"\nLowest efficiency N values (most improvable):")
for _, row in per_n_df_eff.head(10).iterrows():
    print(f"  N={int(row['n'])}: efficiency={row['efficiency']:.4f}, score={row['score']:.6f}")

print(f"\n=== KEY INSIGHT ===")
print(f"The gap of {gap:.6f} points is 2.5% of the total score.")
print(f"This is a HUGE gap that cannot be closed by local optimization.")
print(f"The target solution likely has fundamentally different packing patterns.")