# Loop 1 Analysis: Per-N Score Analysis and Improvement Opportunities

Goal: Understand which N values contribute most to the score gap and identify improvement opportunities.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import matplotlib.pyplot as plt

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
BASE_TREE = Polygon(zip(TX, TY))

def parse_value(val):
    if isinstance(val, str):
        if val.startswith('s'):
            return float(val[1:])
        return float(val)
    return float(val)

def create_tree(x, y, deg):
    tree = affinity.rotate(BASE_TREE, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

def get_bounding_box_side(trees):
    if not trees:
        return 0
    min_x = min_y = float('inf')
    max_x = max_y = float('-inf')
    for tree in trees:
        bounds = tree.bounds
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])
    return max(max_x - min_x, max_y - min_y)

print('Functions loaded')

In [None]:
# Load baseline submission
df = pd.read_csv('/home/submission/submission.csv')
print(f'Loaded {len(df)} rows')
df.head()

In [None]:
# Calculate per-N scores
per_n_scores = []

for n in range(1, 201):
    prefix = f"{n:03d}_"
    n_rows = df[df['id'].str.startswith(prefix)]
    
    trees = []
    for _, row in n_rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(create_tree(x, y, deg))
    
    side = get_bounding_box_side(trees)
    score_n = side**2 / n
    per_n_scores.append({'N': n, 'side': side, 'score': score_n, 'side_sq': side**2})

per_n_df = pd.DataFrame(per_n_scores)
print(f'Total score: {per_n_df["score"].sum():.6f}')
per_n_df.head(20)

In [None]:
# Analyze score distribution
print('Score statistics by N range:')
for start in [1, 11, 21, 51, 101, 151]:
    end = min(start + 49, 200) if start > 1 else 10
    if start == 11: end = 20
    if start == 21: end = 50
    if start == 51: end = 100
    if start == 101: end = 150
    if start == 151: end = 200
    
    subset = per_n_df[(per_n_df['N'] >= start) & (per_n_df['N'] <= end)]
    print(f'N={start}-{end}: total_score={subset["score"].sum():.4f}, avg_score={subset["score"].mean():.4f}, count={len(subset)}')

print(f'\nOverall total: {per_n_df["score"].sum():.6f}')

In [None]:
# Plot per-N scores
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Score per N
ax1 = axes[0, 0]
ax1.bar(per_n_df['N'], per_n_df['score'], alpha=0.7)
ax1.set_xlabel('N')
ax1.set_ylabel('Score (sideÂ²/N)')
ax1.set_title('Score Contribution by N')

# Side length per N
ax2 = axes[0, 1]
ax2.plot(per_n_df['N'], per_n_df['side'], 'b-', alpha=0.7)
ax2.set_xlabel('N')
ax2.set_ylabel('Side Length')
ax2.set_title('Bounding Box Side Length by N')

# Cumulative score
ax3 = axes[1, 0]
per_n_df['cumsum'] = per_n_df['score'].cumsum()
ax3.plot(per_n_df['N'], per_n_df['cumsum'], 'g-')
ax3.axhline(y=68.919154, color='r', linestyle='--', label='Target')
ax3.set_xlabel('N')
ax3.set_ylabel('Cumulative Score')
ax3.set_title('Cumulative Score vs Target')
ax3.legend()

# Score efficiency (side/sqrt(N))
ax4 = axes[1, 1]
per_n_df['efficiency'] = per_n_df['side'] / np.sqrt(per_n_df['N'])
ax4.plot(per_n_df['N'], per_n_df['efficiency'], 'm-', alpha=0.7)
ax4.set_xlabel('N')
ax4.set_ylabel('Side / sqrt(N)')
ax4.set_title('Packing Efficiency (lower is better)')

plt.tight_layout()
plt.savefig('/home/code/exploration/per_n_analysis.png', dpi=100)
plt.show()

In [None]:
# Find N values with highest improvement potential
# Theoretical minimum: if trees could be packed perfectly, side ~ sqrt(N * tree_area)
tree_area = BASE_TREE.area
print(f'Single tree area: {tree_area:.6f}')

# For a square packing, theoretical minimum side = sqrt(N * tree_area)
per_n_df['theoretical_min_side'] = np.sqrt(per_n_df['N'] * tree_area)
per_n_df['theoretical_min_score'] = per_n_df['theoretical_min_side']**2 / per_n_df['N']
per_n_df['gap_to_theoretical'] = per_n_df['score'] - per_n_df['theoretical_min_score']
per_n_df['gap_pct'] = (per_n_df['score'] - per_n_df['theoretical_min_score']) / per_n_df['score'] * 100

print('\nTop 20 N values with highest gap to theoretical minimum:')
print(per_n_df.nlargest(20, 'gap_to_theoretical')[['N', 'score', 'theoretical_min_score', 'gap_to_theoretical', 'gap_pct']])

In [None]:
# Analyze angle distribution in baseline
angles = []
for _, row in df.iterrows():
    deg = parse_value(row['deg'])
    angles.append(deg % 360)

angles = np.array(angles)
print(f'Angle statistics:')
print(f'  Mean: {angles.mean():.2f}')
print(f'  Std: {angles.std():.2f}')
print(f'  Min: {angles.min():.2f}')
print(f'  Max: {angles.max():.2f}')

# Histogram of angles
plt.figure(figsize=(10, 4))
plt.hist(angles, bins=36, edgecolor='black', alpha=0.7)
plt.xlabel('Angle (degrees)')
plt.ylabel('Count')
plt.title('Distribution of Tree Angles in Baseline')
plt.savefig('/home/code/exploration/angle_distribution.png', dpi=100)
plt.show()

In [None]:
# Check for common angle patterns
angle_counts = pd.Series(angles).round(0).value_counts().head(20)
print('Most common angles (rounded to nearest degree):')
print(angle_counts)

In [None]:
# Summary of findings
print('='*60)
print('KEY FINDINGS')
print('='*60)
print(f'\n1. Total baseline score: {per_n_df["score"].sum():.6f}')
print(f'   Target score: 68.919154')
print(f'   Gap: {per_n_df["score"].sum() - 68.919154:.6f}')

print(f'\n2. Score distribution by N range:')
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    subset = per_n_df[(per_n_df['N'] >= start) & (per_n_df['N'] <= end)]
    print(f'   N={start:3d}-{end:3d}: {subset["score"].sum():.4f} ({subset["score"].sum()/per_n_df["score"].sum()*100:.1f}%)')

print(f'\n3. Theoretical minimum total score: {per_n_df["theoretical_min_score"].sum():.6f}')
print(f'   This represents the absolute lower bound if perfect packing were possible.')

print(f'\n4. Angle distribution shows diverse angles, not just 0/90/180/270.')