# Loop 10 Analysis: Understanding the Gap to Target

After 10 experiments with 8 consecutive zero-improvement results, we need to understand:
1. Where is the 1.74 point gap coming from?
2. Which N values have the most room for improvement?
3. What techniques haven't been tried?

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.ops import unary_union
import matplotlib.pyplot as plt

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_polygon(cx, cy, angle_deg):
    angle_rad = np.radians(angle_deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    x = TX * cos_a - TY * sin_a + cx
    y = TX * sin_a + TY * cos_a + cy
    return Polygon(zip(x, y))

# Load current submission
df = pd.read_csv('/home/submission/submission.csv')
print(f'Total rows: {len(df)}')
print(df.head())

In [None]:
# Calculate score per N
scores = []
for n in range(1, 201):
    rows = df[df['id'].str.startswith(f'{n:03d}_')]
    if len(rows) != n:
        print(f'N={n}: Expected {n} rows, got {len(rows)}')
        continue
    trees = []
    for _, row in rows.iterrows():
        x = float(row['x'][1:])
        y = float(row['y'][1:])
        deg = float(row['deg'][1:])
        trees.append(get_tree_polygon(x, y, deg))
    
    all_poly = unary_union(trees)
    bounds = all_poly.bounds
    side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
    score = side**2 / n
    scores.append({'n': n, 'side': side, 'score': score, 'efficiency': n / side**2})

scores_df = pd.DataFrame(scores)
print(f'Total score: {scores_df["score"].sum():.6f}')
print(f'Target score: 68.919154')
print(f'Gap: {scores_df["score"].sum() - 68.919154:.6f}')

In [None]:
# Analyze score distribution
print('\nScore distribution by N range:')
for start in range(1, 201, 20):
    end = min(start + 19, 200)
    subset = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]
    print(f'N={start:3d}-{end:3d}: score={subset["score"].sum():.4f}, avg_efficiency={subset["efficiency"].mean():.4f}')

print(f'\nTotal: {scores_df["score"].sum():.6f}')

In [None]:
# Find N values with lowest efficiency (most room for improvement)
scores_df['efficiency_rank'] = scores_df['efficiency'].rank()
print('\nN values with LOWEST efficiency (most room for improvement):')
print(scores_df.nsmallest(20, 'efficiency')[['n', 'side', 'score', 'efficiency']])

In [None]:
# Calculate theoretical minimum score if we could improve efficiency
# The theoretical minimum for N trees is when they're packed at maximum density
# For the Christmas tree shape, the area is approximately 0.2 (from the polygon)

tree_poly = get_tree_polygon(0, 0, 0)
tree_area = tree_poly.area
print(f'Tree area: {tree_area:.6f}')

# Theoretical minimum side for N trees (if perfect packing)
scores_df['theoretical_min_side'] = np.sqrt(scores_df['n'] * tree_area / 0.9)  # 90% packing efficiency
scores_df['theoretical_min_score'] = scores_df['theoretical_min_side']**2 / scores_df['n']
scores_df['improvement_potential'] = scores_df['score'] - scores_df['theoretical_min_score']

print('\nN values with HIGHEST improvement potential:')
print(scores_df.nsmallest(20, 'improvement_potential')[['n', 'score', 'theoretical_min_score', 'improvement_potential']])

In [None]:
# Visualize score vs N
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Score per N
ax = axes[0, 0]
ax.plot(scores_df['n'], scores_df['score'], 'b-', linewidth=0.5)
ax.set_xlabel('N')
ax.set_ylabel('Score (side^2/N)')
ax.set_title('Score per N')
ax.grid(True, alpha=0.3)

# Efficiency per N
ax = axes[0, 1]
ax.plot(scores_df['n'], scores_df['efficiency'], 'g-', linewidth=0.5)
ax.set_xlabel('N')
ax.set_ylabel('Efficiency (N/side^2)')
ax.set_title('Packing Efficiency per N')
ax.grid(True, alpha=0.3)

# Cumulative score
ax = axes[1, 0]
ax.plot(scores_df['n'], scores_df['score'].cumsum(), 'r-')
ax.axhline(y=68.919154, color='k', linestyle='--', label='Target')
ax.set_xlabel('N')
ax.set_ylabel('Cumulative Score')
ax.set_title('Cumulative Score vs N')
ax.legend()
ax.grid(True, alpha=0.3)

# Side length vs N
ax = axes[1, 1]
ax.plot(scores_df['n'], scores_df['side'], 'b-', linewidth=0.5)
ax.plot(scores_df['n'], np.sqrt(scores_df['n']), 'r--', alpha=0.5, label='sqrt(N)')
ax.set_xlabel('N')
ax.set_ylabel('Side Length')
ax.set_title('Side Length vs N')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('/home/code/exploration/loop10_score_analysis.png', dpi=100)
plt.show()

In [None]:
# Calculate required improvement per N to reach target
target = 68.919154
current = scores_df['score'].sum()
gap = current - target

print(f'Current score: {current:.6f}')
print(f'Target score: {target:.6f}')
print(f'Gap: {gap:.6f}')
print(f'Gap percentage: {gap/current*100:.2f}%')

# If we improve all N values uniformly
print(f'\nRequired uniform improvement: {gap/200:.6f} per N')
print(f'Required side reduction: {(1 - np.sqrt(1 - gap/current))*100:.4f}%')

In [None]:
# Check if there are any public datasets we haven't tried
import os

print('Available datasets:')
for root, dirs, files in os.walk('/home/code/data'):
    for f in files:
        if f.endswith('.csv'):
            path = os.path.join(root, f)
            try:
                df_check = pd.read_csv(path)
                if 'id' in df_check.columns and 'x' in df_check.columns:
                    print(f'  {path}: {len(df_check)} rows')
            except:
                pass

for root, dirs, files in os.walk('/home/code/research/kernels'):
    for f in files:
        if f.endswith('.csv'):
            path = os.path.join(root, f)
            try:
                df_check = pd.read_csv(path)
                if 'id' in df_check.columns and 'x' in df_check.columns:
                    print(f'  {path}: {len(df_check)} rows')
            except:
                pass