# Loop 12 Analysis: Understanding the Gap to Target

**Current Best**: 70.630478
**Target**: 68.919154
**Gap**: 1.711 points (2.48%)

This analysis will:
1. Analyze per-N score breakdown to find where improvements are possible
2. Compare our solutions to theoretical bounds
3. Identify which N values have the most room for improvement

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
import matplotlib.pyplot as plt

getcontext().prec = 25
scale_factor = Decimal("1e15")

class ChristmasTree:
    def __init__(self, center_x="0", center_y="0", angle="0"):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal("0.15")
        trunk_h = Decimal("0.2")
        base_w = Decimal("0.7")
        mid_w = Decimal("0.4")
        top_w = Decimal("0.25")
        tip_y = Decimal("0.8")
        tier_1_y = Decimal("0.5")
        tier_2_y = Decimal("0.25")
        base_y = Decimal("0.0")
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal("0.0") * scale_factor, tip_y * scale_factor),
            (top_w / Decimal("2") * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal("4") * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal("2") * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal("4") * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal("4")) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal("2")) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal("4")) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal("2")) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated,
            xoff=float(self.center_x * scale_factor),
            yoff=float(self.center_y * scale_factor),
        )

def calculate_score(trees):
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / 1e15 for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side = max(max_x - min_x, max_y - min_y)
    score = side ** 2 / len(trees)
    return score, side

def load_trees(n, df):
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"]).lstrip('s')
        y = str(row["y"]).lstrip('s')
        deg = str(row["deg"]).lstrip('s')
        trees.append(ChristmasTree(x, y, deg))
    return trees

print("Functions defined successfully")

In [None]:
# Load current best solution
df = pd.read_csv('/home/code/exploration/datasets/saspav_best.csv')
print(f"Loaded {len(df)} rows")

# Calculate per-N scores
scores = []
for n in range(1, 201):
    trees = load_trees(n, df)
    score, side = calculate_score(trees)
    scores.append({'n': n, 'score': score, 'side': side})

scores_df = pd.DataFrame(scores)
print(f"\nTotal score: {scores_df['score'].sum():.6f}")
print(f"Target: 68.919154")
print(f"Gap: {scores_df['score'].sum() - 68.919154:.6f}")

In [None]:
# Calculate theoretical lower bound
# For a single tree, the minimum bounding box side is approximately 0.813 (at 45 degrees)
# The area of a single tree is approximately 0.3 (rough estimate)

# Theoretical lower bound: if we could pack trees perfectly with no wasted space
# Tree area ~ 0.3 (approximate)
# For N trees, minimum square side = sqrt(N * tree_area) = sqrt(N * 0.3)
# Score = side^2 / N = N * 0.3 / N = 0.3 (constant!)

# But this ignores the irregular shape and packing inefficiency
# Let's calculate actual efficiency

tree_area = 0.3  # approximate area of one tree

efficiencies = []
for _, row in scores_df.iterrows():
    n = row['n']
    side = row['side']
    actual_area = side ** 2
    theoretical_min_area = n * tree_area
    efficiency = theoretical_min_area / actual_area * 100
    efficiencies.append(efficiency)

scores_df['efficiency'] = efficiencies

print("Efficiency analysis (higher = better packing):")
print(scores_df.describe())

In [None]:
# Find N values with lowest efficiency (most room for improvement)
print("\nN values with LOWEST efficiency (most room for improvement):")
lowest_eff = scores_df.nsmallest(20, 'efficiency')
print(lowest_eff[['n', 'score', 'side', 'efficiency']].to_string())

print("\nN values with HIGHEST efficiency (already well-optimized):")
highest_eff = scores_df.nlargest(20, 'efficiency')
print(highest_eff[['n', 'score', 'side', 'efficiency']].to_string())

In [None]:
# Calculate how much improvement is needed per N to reach target
target = 68.919154
current_total = scores_df['score'].sum()
gap = current_total - target

print(f"Current total: {current_total:.6f}")
print(f"Target: {target}")
print(f"Gap to close: {gap:.6f}")
print(f"Average improvement needed per N: {gap / 200:.6f}")

# If we could improve each N by the same percentage
improvement_pct = gap / current_total * 100
print(f"\nRequired improvement: {improvement_pct:.2f}%")

# Show score distribution by N ranges
print("\nScore contribution by N range:")
for start, end in [(1, 20), (21, 50), (51, 100), (101, 150), (151, 200)]:
    range_score = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]['score'].sum()
    print(f"  N={start:3d}-{end:3d}: {range_score:.4f} ({range_score/current_total*100:.1f}%)")
    
# Calculate how much improvement needed in each range
print("\nIf we improve each range by 2.48%:")
for start, end in [(1, 20), (21, 50), (51, 100), (101, 150), (151, 200)]:
    range_score = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]['score'].sum()
    improvement = range_score * 0.0248
    print(f"  N={start:3d}-{end:3d}: improvement = {improvement:.4f}")

In [None]:
# Visualize score distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Score per N
ax1 = axes[0, 0]
ax1.bar(scores_df['n'], scores_df['score'], alpha=0.7)
ax1.set_xlabel('N')
ax1.set_ylabel('Score (SÂ²/N)')
ax1.set_title('Score per N')
ax1.axhline(y=gap/200, color='r', linestyle='--', label=f'Avg improvement needed: {gap/200:.4f}')
ax1.legend()

# Efficiency per N
ax2 = axes[0, 1]
ax2.scatter(scores_df['n'], scores_df['efficiency'], alpha=0.5, s=10)
ax2.set_xlabel('N')
ax2.set_ylabel('Packing Efficiency (%)')
ax2.set_title('Packing Efficiency per N')

# Cumulative score
ax3 = axes[1, 0]
scores_df['cumulative'] = scores_df['score'].cumsum()
ax3.plot(scores_df['n'], scores_df['cumulative'])
ax3.axhline(y=target, color='r', linestyle='--', label=f'Target: {target}')
ax3.set_xlabel('N')
ax3.set_ylabel('Cumulative Score')
ax3.set_title('Cumulative Score vs Target')
ax3.legend()

# Side length per N
ax4 = axes[1, 1]
ax4.scatter(scores_df['n'], scores_df['side'], alpha=0.5, s=10)
ax4.plot(scores_df['n'], np.sqrt(scores_df['n'] * 0.3), 'r--', label='Theoretical min (sqrt(N*0.3))')
ax4.set_xlabel('N')
ax4.set_ylabel('Side Length')
ax4.set_title('Side Length per N')
ax4.legend()

plt.tight_layout()
plt.savefig('/home/code/exploration/loop12_score_analysis.png', dpi=100)
plt.show()
print("\nPlot saved to /home/code/exploration/loop12_score_analysis.png")

In [None]:
# Key insight: What would it take to reach the target?
# Let's see if there are specific N values where we're far from optimal

print("=" * 60)
print("KEY INSIGHT: Where can we find 1.711 points of improvement?")
print("=" * 60)

# Option 1: Improve all N by 2.48%
print("\nOption 1: Uniform 2.48% improvement across all N")
print("  This requires finding better solutions for EVERY N value")
print("  Unlikely given that public solutions are already well-optimized")

# Option 2: Focus on low-efficiency N values
print("\nOption 2: Focus on N values with lowest efficiency")
low_eff = scores_df[scores_df['efficiency'] < 50]
print(f"  Found {len(low_eff)} N values with efficiency < 50%")
if len(low_eff) > 0:
    potential_improvement = low_eff['score'].sum() * 0.1  # 10% improvement on these
    print(f"  If we improve these by 10%: {potential_improvement:.4f} points")

# Option 3: Focus on small N (where SA works best)
print("\nOption 3: Focus on small N (1-20) where SA is most effective")
small_n_score = scores_df[scores_df['n'] <= 20]['score'].sum()
print(f"  Current small N score: {small_n_score:.4f}")
print(f"  If we improve by 10%: {small_n_score * 0.1:.4f} points")

# Option 4: Focus on large N (where crystalline packing helps)
print("\nOption 4: Focus on large N (100-200) where crystalline packing helps")
large_n_score = scores_df[scores_df['n'] >= 100]['score'].sum()
print(f"  Current large N score: {large_n_score:.4f}")
print(f"  If we improve by 5%: {large_n_score * 0.05:.4f} points")

print("\n" + "=" * 60)
print("CONCLUSION: The gap of 1.711 points is SIGNIFICANT")
print("We need fundamentally different solutions, not micro-optimizations")
print("=" * 60)