# Loop 2 Analysis: Strategic Assessment

## Current Status
- Best CV: 70.659493 (exp_001 - rebuild_from_corners)
- Best LB: 70.659958 (exp_000 - baseline)
- Target: 68.919154
- Gap: 1.74 points (2.5%)

## Key Questions
1. What is the per-N score distribution? Where is the most room for improvement?
2. What techniques haven't been tried yet?
3. Can we run the eazy-optimizer C++ code?

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import matplotlib.pyplot as plt

getcontext().prec = 25

print('Libraries loaded')

In [None]:
# Load current best solution
df = pd.read_csv('/home/submission/submission.csv')
df['x'] = df['x'].str.strip('s').astype(float)
df['y'] = df['y'].str.strip('s').astype(float)
df['deg'] = df['deg'].str.strip('s').astype(float)
df[['N', 'idx']] = df['id'].str.split('_', expand=True)
df['N'] = df['N'].astype(int)
df['idx'] = df['idx'].astype(int)

print(f'Loaded {len(df)} rows')
print(f'N ranges from {df["N"].min()} to {df["N"].max()}')
df.head()

In [None]:
# Calculate per-N scores (side^2 / N)
# Note: This is approximate - doesn't account for tree shape, just centers

def get_side_length_approx(group):
    """Approximate side length from tree centers (ignoring tree shape)."""
    x_range = group['x'].max() - group['x'].min()
    y_range = group['y'].max() - group['y'].min()
    # Add approximate tree dimensions (0.7 width, 1.0 height)
    return max(x_range + 0.7, y_range + 1.0)

per_n_scores = []
for n, group in df.groupby('N'):
    side = get_side_length_approx(group)
    score = (side ** 2) / n
    per_n_scores.append({'N': n, 'side': side, 'score': score})

scores_df = pd.DataFrame(per_n_scores)
print(f'Total approximate score: {scores_df["score"].sum():.4f}')
print(f'\nTop 10 highest per-N scores (most room for improvement):')
print(scores_df.nlargest(10, 'score'))

In [None]:
# Plot per-N scores
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(scores_df['N'], scores_df['score'], 'b-', alpha=0.7)
plt.xlabel('N')
plt.ylabel('Score (side^2 / N)')
plt.title('Per-N Score Distribution')
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(scores_df['N'], scores_df['side'], 'g-', alpha=0.7)
plt.xlabel('N')
plt.ylabel('Side Length')
plt.title('Side Length vs N')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('/home/code/exploration/per_n_analysis.png', dpi=100)
plt.show()
print('Saved to /home/code/exploration/per_n_analysis.png')

In [None]:
# Analyze which N values contribute most to total score
scores_df['contribution'] = scores_df['score'] / scores_df['score'].sum() * 100
print('Top 10 N values by contribution to total score:')
print(scores_df.nlargest(10, 'contribution')[['N', 'score', 'contribution']])

In [None]:
# Calculate theoretical minimum for each N
# For a single tree: side = 1.0 (height), score = 1.0
# For N trees in perfect packing: side ~ sqrt(N * tree_area)
# Tree area ~ 0.35 (approximate)

tree_area = 0.35  # approximate
scores_df['theoretical_min'] = (np.sqrt(scores_df['N'] * tree_area) ** 2) / scores_df['N']
scores_df['gap_to_theoretical'] = scores_df['score'] - scores_df['theoretical_min']

print('N values with largest gap to theoretical minimum:')
print(scores_df.nlargest(10, 'gap_to_theoretical')[['N', 'score', 'theoretical_min', 'gap_to_theoretical']])

In [None]:
# Check if eazy optimizer is ready
import subprocess
result = subprocess.run(['ls', '-la', '/home/code/eazy'], capture_output=True, text=True)
print(result.stdout)
print(result.stderr)