# Loop 20 Analysis: Understanding the Gap to Target

**Current best**: 70.630370
**Target**: 68.919154
**Gap**: 1.711 (2.48%)

After 21 experiments, all SA-based approaches converge to ~70.630. We need to understand:
1. WHERE does the gap come from? Which N values have room for improvement?
2. What is the theoretical lower bound for each N?
3. Are there N values where asymmetric solutions could beat symmetric ones?

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import matplotlib.pyplot as plt

TREE_TEMPLATE = [
    (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
    (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
    (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5)
]

def parse_s_value(val):
    if isinstance(val, str):
        if val.startswith('s'):
            return float(val[1:])
        return float(val)
    return float(val)

def create_tree_polygon(x, y, angle):
    tree = Polygon(TREE_TEMPLATE)
    tree = rotate(tree, angle, origin=(0, 0), use_radians=False)
    tree = translate(tree, x, y)
    return tree

def get_n_side(df, n):
    group = df[df['n'] == n]
    all_x = []
    all_y = []
    for _, row in group.iterrows():
        tree = create_tree_polygon(row['x'], row['y'], row['deg'])
        minx, miny, maxx, maxy = tree.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    if not all_x:
        return 0
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

# Load current best
df = pd.read_csv('/home/submission/submission.csv')
df['x'] = df['x'].apply(parse_s_value)
df['y'] = df['y'].apply(parse_s_value)
df['deg'] = df['deg'].apply(parse_s_value)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

print(f"Loaded {len(df)} rows")
print(f"N values: {df['n'].nunique()}")
print(f"N range: {df['n'].min()} to {df['n'].max()}")


In [None]:
# Calculate score breakdown by N
scores = []
for n in range(1, 201):
    side = get_n_side(df, n)
    score = (side ** 2) / n
    scores.append({'n': n, 'side': side, 'score': score})

scores_df = pd.DataFrame(scores)
print(f"Total score: {scores_df['score'].sum():.6f}")
print(f"Target: 68.919154")
print(f"Gap: {scores_df['score'].sum() - 68.919154:.6f}")

# Show top 20 N values by score contribution
print("\nTop 20 N values by score contribution:")
print(scores_df.nlargest(20, 'score')[['n', 'side', 'score']].to_string())

In [None]:
# Calculate theoretical lower bound
# The tree has a bounding box of approximately 0.7 x 1.0 (width x height)
# For N trees, the minimum area is N * tree_area
# The minimum side of a square containing this area is sqrt(N * tree_area)

# Single tree dimensions
single_tree = Polygon(TREE_TEMPLATE)
minx, miny, maxx, maxy = single_tree.bounds
tree_width = maxx - minx
tree_height = maxy - miny
tree_area = single_tree.area
tree_bbox_area = tree_width * tree_height

print(f"Single tree dimensions:")
print(f"  Width: {tree_width:.4f}")
print(f"  Height: {tree_height:.4f}")
print(f"  Area: {tree_area:.4f}")
print(f"  Bounding box area: {tree_bbox_area:.4f}")

# Theoretical lower bound: sqrt(N * tree_bbox_area)
# This assumes perfect packing with no wasted space
scores_df['theoretical_side'] = np.sqrt(scores_df['n'] * tree_bbox_area)
scores_df['theoretical_score'] = (scores_df['theoretical_side'] ** 2) / scores_df['n']
scores_df['efficiency'] = scores_df['theoretical_side'] / scores_df['side']
scores_df['gap_to_theoretical'] = scores_df['score'] - scores_df['theoretical_score']

print(f"\nTheoretical minimum total score: {scores_df['theoretical_score'].sum():.6f}")
print(f"Current total score: {scores_df['score'].sum():.6f}")
print(f"Gap to theoretical: {scores_df['score'].sum() - scores_df['theoretical_score'].sum():.6f}")

# Show N values with largest gap to theoretical
print("\nN values with largest gap to theoretical:")
print(scores_df.nlargest(20, 'gap_to_theoretical')[['n', 'side', 'theoretical_side', 'score', 'theoretical_score', 'gap_to_theoretical', 'efficiency']].to_string())

In [None]:
# Plot efficiency by N
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.scatter(scores_df['n'], scores_df['efficiency'], alpha=0.5, s=10)
plt.xlabel('N')
plt.ylabel('Efficiency (theoretical/actual side)')
plt.title('Packing Efficiency by N')
plt.axhline(y=1.0, color='r', linestyle='--', label='Perfect packing')
plt.legend()

plt.subplot(1, 2, 2)
plt.scatter(scores_df['n'], scores_df['gap_to_theoretical'], alpha=0.5, s=10)
plt.xlabel('N')
plt.ylabel('Gap to theoretical score')
plt.title('Gap to Theoretical by N')

plt.tight_layout()
plt.savefig('/home/code/exploration/efficiency_analysis.png', dpi=100)
plt.show()

print("Saved to /home/code/exploration/efficiency_analysis.png")

In [None]:
# Identify N values where improvement is most likely
# These are N values with low efficiency (far from theoretical)

print("N values with lowest efficiency (most room for improvement):")
low_efficiency = scores_df.nsmallest(30, 'efficiency')
print(low_efficiency[['n', 'side', 'theoretical_side', 'efficiency', 'score', 'gap_to_theoretical']].to_string())

# Calculate how much improvement we'd get if we could reach 90% efficiency
scores_df['score_at_90pct'] = (scores_df['theoretical_side'] / 0.9) ** 2 / scores_df['n']
scores_df['potential_improvement'] = scores_df['score'] - scores_df['score_at_90pct']

print(f"\nIf we could reach 90% efficiency for all N:")
print(f"  Current total: {scores_df['score'].sum():.6f}")
print(f"  At 90% efficiency: {scores_df['score_at_90pct'].sum():.6f}")
print(f"  Potential improvement: {scores_df['score'].sum() - scores_df['score_at_90pct'].sum():.6f}")

In [None]:
# Check if there are patterns in which N values have better efficiency
# Perfect squares, triangular numbers, etc.

perfect_squares = [i**2 for i in range(1, 15)]  # 1, 4, 9, 16, 25, ..., 196
triangular = [i*(i+1)//2 for i in range(1, 20)]  # 1, 3, 6, 10, 15, ...

print("Efficiency for perfect squares:")
for n in perfect_squares:
    if n <= 200:
        row = scores_df[scores_df['n'] == n].iloc[0]
        print(f"  N={n:3d}: efficiency={row['efficiency']:.4f}, score={row['score']:.6f}")

print("\nEfficiency for triangular numbers:")
for n in triangular:
    if n <= 200:
        row = scores_df[scores_df['n'] == n].iloc[0]
        print(f"  N={n:3d}: efficiency={row['efficiency']:.4f}, score={row['score']:.6f}")

In [None]:
# Calculate what score we'd need per N to reach target
target = 68.919154
current_total = scores_df['score'].sum()
gap = current_total - target

print(f"Current total: {current_total:.6f}")
print(f"Target: {target:.6f}")
print(f"Gap: {gap:.6f}")

# If we improved all N values proportionally
scores_df['target_score'] = scores_df['score'] * (target / current_total)
scores_df['needed_improvement'] = scores_df['score'] - scores_df['target_score']

print(f"\nTo reach target with proportional improvement:")
print(f"  Need {(1 - target/current_total)*100:.2f}% improvement across all N")

# Show which N values need the most improvement
print("\nN values needing most improvement (absolute):")
print(scores_df.nlargest(20, 'needed_improvement')[['n', 'score', 'target_score', 'needed_improvement']].to_string())

In [None]:
# Summary: What would it take to reach the target?
print("="*60)
print("SUMMARY: What would it take to reach target 68.919154?")
print("="*60)

print(f"\nCurrent score: {current_total:.6f}")
print(f"Target: {target:.6f}")
print(f"Gap: {gap:.6f} ({gap/current_total*100:.2f}%)")

print(f"\nTheoretical minimum (perfect packing): {scores_df['theoretical_score'].sum():.6f}")
print(f"Gap to theoretical: {current_total - scores_df['theoretical_score'].sum():.6f}")

print(f"\nAverage efficiency: {scores_df['efficiency'].mean():.4f}")
print(f"Min efficiency: {scores_df['efficiency'].min():.4f} (N={scores_df.loc[scores_df['efficiency'].idxmin(), 'n']})")
print(f"Max efficiency: {scores_df['efficiency'].max():.4f} (N={scores_df.loc[scores_df['efficiency'].idxmax(), 'n']})")

print(f"\nTo reach target, we need to improve by {gap:.6f} points.")
print(f"This is equivalent to improving efficiency from {scores_df['efficiency'].mean():.4f} to {scores_df['efficiency'].mean() * (target/current_total):.4f}")

# Check if target is achievable
if target < scores_df['theoretical_score'].sum():
    print(f"\n WARNING: Target {target} is BELOW theoretical minimum {scores_df['theoretical_score'].sum():.6f}!")
    print("This target may be impossible to achieve.")
else:
    print(f"\n Target {target} is above theoretical minimum {scores_df['theoretical_score'].sum():.6f}")
    print("Target is theoretically achievable.")