# Loop 1 Analysis: Understanding Current State and Path Forward

## Key Questions:
1. What is the per-N score breakdown? Which N values contribute most to the total?
2. Can we run tree_packer_v21 with more aggressive settings?
3. What's the gap between our score (70.73) and target (68.93)?

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import matplotlib.pyplot as plt

getcontext().prec = 30
scale_factor = Decimal('1e18')

# Load the current best submission
df = pd.read_csv('/home/code/experiments/001_baseline/submission.csv')
print(f'Loaded {len(df)} rows')
print(df.head(10))

In [None]:
# Define tree shape
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated, 
            xoff=float(self.center_x * scale_factor), 
            yoff=float(self.center_y * scale_factor)
        )

print('ChristmasTree class defined')

In [None]:
def load_configuration_from_df(n, df):
    """Loads all trees for a given N from the submission DataFrame."""
    group_data = df[df['id'].str.startswith(f'{n:03d}_')]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row['x'])[1:] if str(row['x']).startswith('s') else str(row['x'])
        y = str(row['y'])[1:] if str(row['y']).startswith('s') else str(row['y'])
        deg = str(row['deg'])[1:] if str(row['deg']).startswith('s') else str(row['deg'])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def get_score(trees, n):
    """Calculates the score (S^2 / N) for a given configuration."""
    if not trees:
        return 0.0
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    return side_length**2 / n, side_length

print('Helper functions defined')

In [None]:
# Calculate per-N scores
per_n_scores = []
per_n_sides = []

for n in range(1, 201):
    trees = load_configuration_from_df(n, df)
    if trees:
        score, side = get_score(trees, n)
        per_n_scores.append({'n': n, 'score': score, 'side': side, 'trees': len(trees)})
        per_n_sides.append(side)
    else:
        per_n_scores.append({'n': n, 'score': 0, 'side': 0, 'trees': 0})
        per_n_sides.append(0)

scores_df = pd.DataFrame(per_n_scores)
total_score = scores_df['score'].sum()
print(f'Total score: {total_score:.10f}')
print(f'Target: 68.931058')
print(f'Gap: {total_score - 68.931058:.6f} ({(total_score - 68.931058) / 68.931058 * 100:.2f}%)')

In [None]:
# Identify worst-performing N values (highest score contribution)
scores_df_sorted = scores_df.sort_values('score', ascending=False)
print('Top 20 worst-performing N values (highest score contribution):')
print(scores_df_sorted.head(20).to_string())

print('\nTop 20 best-performing N values (lowest score contribution):')
print(scores_df_sorted.tail(20).to_string())

In [None]:
# Plot score distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Score per N
ax1 = axes[0, 0]
ax1.bar(scores_df['n'], scores_df['score'], alpha=0.7)
ax1.set_xlabel('N')
ax1.set_ylabel('Score (S²/N)')
ax1.set_title('Score Contribution by N')
ax1.axhline(y=scores_df['score'].mean(), color='r', linestyle='--', label=f'Mean: {scores_df["score"].mean():.4f}')
ax1.legend()

# Side length per N
ax2 = axes[0, 1]
ax2.plot(scores_df['n'], scores_df['side'], 'b-', alpha=0.7)
ax2.set_xlabel('N')
ax2.set_ylabel('Side Length')
ax2.set_title('Bounding Box Side Length by N')

# Cumulative score
ax3 = axes[1, 0]
scores_df['cumulative'] = scores_df['score'].cumsum()
ax3.plot(scores_df['n'], scores_df['cumulative'], 'g-')
ax3.axhline(y=68.931058, color='r', linestyle='--', label='Target: 68.93')
ax3.set_xlabel('N')
ax3.set_ylabel('Cumulative Score')
ax3.set_title('Cumulative Score by N')
ax3.legend()

# Efficiency (side / sqrt(N))
ax4 = axes[1, 1]
scores_df['efficiency'] = scores_df['side'] / np.sqrt(scores_df['n'])
ax4.plot(scores_df['n'], scores_df['efficiency'], 'm-', alpha=0.7)
ax4.set_xlabel('N')
ax4.set_ylabel('Side / √N')
ax4.set_title('Packing Efficiency by N')

plt.tight_layout()
plt.savefig('/home/code/exploration/per_n_analysis.png', dpi=100)
plt.show()
print('Saved analysis plot')

In [None]:
# Analyze where the biggest improvements could come from
print('\n=== IMPROVEMENT OPPORTUNITY ANALYSIS ===')
print(f'\nCurrent total: {total_score:.6f}')
print(f'Target: 68.931058')
print(f'Need to reduce by: {total_score - 68.931058:.6f}')

# If we could improve each N by X%, what would the total be?
for pct in [1, 2, 3, 5, 10]:
    improved = total_score * (1 - pct/100)
    print(f'  {pct}% improvement across all N: {improved:.6f} (gap: {improved - 68.931058:.6f})')

# Focus on high-score N values
print('\n=== TOP 10 N VALUES TO FOCUS ON ===')
top10 = scores_df_sorted.head(10)
print(f'These 10 N values contribute {top10["score"].sum():.4f} ({top10["score"].sum()/total_score*100:.1f}% of total)')
for _, row in top10.iterrows():
    print(f'  N={row["n"]:3d}: score={row["score"]:.6f}, side={row["side"]:.6f}')

In [None]:
# Check if tree_packer_v21 can be run with more aggressive settings
import subprocess
import os

os.chdir('/home/code/experiments/001_baseline')

# Check if tree_packer_v21 exists and is executable
if os.path.exists('./tree_packer_v21'):
    result = subprocess.run(['./tree_packer_v21', '--help'], capture_output=True, text=True, timeout=5)
    print('tree_packer_v21 help:')
    print(result.stdout if result.stdout else result.stderr)
else:
    print('tree_packer_v21 not found')

In [None]:
# Check GLIBC version and bbox3 compatibility
print('System GLIBC version:')
!ldd --version | head -1

print('\nbbox3 requirements:')
!ldd ./bbox3 2>&1 | head -10