# Loop 3 Analysis: Understanding the Gap to Target

We need to understand WHERE the 1.73 point gap comes from and what approaches might close it.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the current best submission
df = pd.read_csv('/home/code/experiments/003_full_ensemble_bbox3/submission.csv')
print(f'Loaded {len(df)} rows')

# Parse the data
df['x'] = df['x'].str.replace('s', '').astype(float)
df['y'] = df['y'].str.replace('s', '').astype(float)
df['deg'] = df['deg'].str.replace('s', '').astype(float)
df['n'] = df['id'].str.split('_').str[0].astype(int)

print(df.head())

In [None]:
# Calculate score per N
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union

getcontext().prec = 30
scale_factor = 1

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def get_tree_list_side_length(tree_list):
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def score_n(n, group_df):
    trees = [ChristmasTree(str(row['x']), str(row['y']), str(row['deg'])) for _, row in group_df.iterrows()]
    side = get_tree_list_side_length(trees)
    return side ** 2 / n

print('Calculating per-N scores...')

In [None]:
# Calculate scores for all N
scores = {}
for n in range(1, 201):
    group_df = df[df['n'] == n]
    scores[n] = score_n(n, group_df)
    if n <= 10 or n % 50 == 0:
        print(f'N={n:3d}: score={scores[n]:.6f}')

total_score = sum(scores.values())
print(f'\nTotal score: {total_score:.6f}')
print(f'Target: 68.919154')
print(f'Gap: {total_score - 68.919154:.6f}')

In [None]:
# Analyze the gap - where does it come from?
# The target is 68.919154, we have 70.647306
# Gap = 1.728152

# Calculate theoretical minimum (perfect packing efficiency)
# Tree area = 0.3175 (calculated from polygon)
tree_area = 0.3175

theoretical_min = []
for n in range(1, 201):
    # Minimum side length if trees could be packed with 100% efficiency
    min_side = np.sqrt(n * tree_area)
    theoretical_min.append(min_side ** 2 / n)

print(f'Theoretical minimum (100% efficiency): {sum(theoretical_min):.6f}')
print(f'Current score: {total_score:.6f}')
print(f'Efficiency: {sum(theoretical_min) / total_score * 100:.2f}%')

In [None]:
# Plot score vs N
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Score per N
ax1 = axes[0, 0]
ax1.plot(range(1, 201), [scores[n] for n in range(1, 201)], 'b-', label='Current')
ax1.plot(range(1, 201), theoretical_min, 'g--', alpha=0.5, label='Theoretical min')
ax1.set_xlabel('N')
ax1.set_ylabel('Score (SÂ²/N)')
ax1.set_title('Score per N')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Efficiency per N
ax2 = axes[0, 1]
efficiency = [theoretical_min[i] / scores[i+1] * 100 for i in range(200)]
ax2.plot(range(1, 201), efficiency, 'r-')
ax2.set_xlabel('N')
ax2.set_ylabel('Efficiency (%)')
ax2.set_title('Packing Efficiency per N')
ax2.axhline(y=np.mean(efficiency), color='k', linestyle='--', label=f'Mean: {np.mean(efficiency):.1f}%')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Cumulative score
ax3 = axes[1, 0]
cumulative = np.cumsum([scores[n] for n in range(1, 201)])
ax3.plot(range(1, 201), cumulative, 'b-')
ax3.axhline(y=68.919154, color='r', linestyle='--', label='Target')
ax3.set_xlabel('N')
ax3.set_ylabel('Cumulative Score')
ax3.set_title('Cumulative Score vs N')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Gap contribution per N (how much each N contributes to the gap)
ax4 = axes[1, 1]
# We need to estimate what the target scores might be per N
# Assuming uniform improvement needed: gap_per_n = gap * (score_n / total_score)
gap = total_score - 68.919154
gap_contribution = [(scores[n] / total_score) * gap for n in range(1, 201)]
ax4.bar(range(1, 201), gap_contribution, alpha=0.7)
ax4.set_xlabel('N')
ax4.set_ylabel('Gap Contribution')
ax4.set_title('Estimated Gap Contribution per N')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('/home/code/exploration/loop3_analysis.png', dpi=100)
plt.show()

print(f'\nTop 10 N values by gap contribution:')
gap_df = pd.DataFrame({'N': range(1, 201), 'gap_contribution': gap_contribution, 'score': [scores[n] for n in range(1, 201)]})
gap_df = gap_df.sort_values('gap_contribution', ascending=False)
print(gap_df.head(10))

In [None]:
# Analyze which N values have the worst efficiency
eff_df = pd.DataFrame({
    'N': range(1, 201),
    'score': [scores[n] for n in range(1, 201)],
    'theoretical_min': theoretical_min,
    'efficiency': efficiency
})

print('Top 10 N values with WORST efficiency (most room for improvement):')
print(eff_df.sort_values('efficiency').head(10))

print('\nTop 10 N values with BEST efficiency:')
print(eff_df.sort_values('efficiency', ascending=False).head(10))

In [None]:
# Key insight: What would we need to achieve per-N to reach the target?
# If we need to reduce total by 1.728152, and we have 200 N values...
# Average reduction needed per N = 1.728152 / 200 = 0.00864

avg_reduction_needed = gap / 200
print(f'Average reduction needed per N: {avg_reduction_needed:.6f}')
print(f'As percentage of average score: {avg_reduction_needed / (total_score/200) * 100:.2f}%')

# But it's not uniform - larger scores need larger reductions
print(f'\nTo reach target, we need {gap/total_score*100:.2f}% improvement overall')
print(f'This means each N needs ~{gap/total_score*100:.2f}% improvement')

# What if we could improve the worst efficiency N values?
worst_10 = eff_df.sort_values('efficiency').head(10)
print(f'\nIf we improved the 10 worst efficiency N values to 50% efficiency:')
for _, row in worst_10.iterrows():
    n = int(row['N'])
    current = row['score']
    target_50 = row['theoretical_min'] / 0.50  # 50% efficiency
    improvement = current - target_50
    print(f'  N={n:3d}: {current:.6f} -> {target_50:.6f} (improvement: {improvement:.6f})')