# Evolver Loop 11 - LB Feedback Analysis

## Submission Result
- **CV Score**: 70.6305
- **LB Score**: 70.6305 (PERFECT CALIBRATION!)
- **Target**: 68.919154
- **Gap**: 1.711 points (2.42%)

## Key Insight from Web Research
Top teams achieving sub-69 scores use THREE different packers:
1. **Chebyshev-distance square-packing** - regular grid
2. **Smart Scanline linear packer** - sweep line, insert at first feasible position
3. **2-tree alternating lattice** - pairs of trees alternating up/down

They run ALL THREE for each N=1-200 and pick the smallest sn.

In [None]:
import pandas as pd
import numpy as np
import json

# Load current best solution
current_best = pd.read_csv('/home/code/exploration/datasets/saspav_best.csv')
print(f"Current best solution: {len(current_best)} rows")
print(current_best.head())

In [None]:
# Analyze per-N scores to find where we have the most room for improvement
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union

getcontext().prec = 25
scale_factor = Decimal('1e15')

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def load_trees(n, df):
    group_data = df[df['id'].str.startswith(f'{n:03d}_')]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row['x']).lstrip('sx')
        y = str(row['y']).lstrip('sy')
        deg = str(row['deg']).lstrip('sd')
        trees.append(ChristmasTree(x, y, deg))
    return trees

def calculate_score(trees):
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / 1e15 for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    score = max(max_x - min_x, max_y - min_y) ** 2 / len(trees)
    return score

print("Functions defined")

In [None]:
# Calculate per-N scores
per_n_scores = {}
for n in range(1, 201):
    trees = load_trees(n, current_best)
    per_n_scores[n] = calculate_score(trees)

# Find N values with highest scores (most room for improvement)
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)

print("Top 20 N values with highest per-N scores (most room for improvement):")
print("="*60)
for n, score in sorted_scores[:20]:
    print(f"N={n:3d}: score = {score:.6f}")

print("\nTotal score:", sum(per_n_scores.values()))

In [None]:
# Analyze score distribution by N ranges
ranges = [
    (1, 10, "Small (1-10)"),
    (11, 50, "Medium (11-50)"),
    (51, 100, "Large (51-100)"),
    (101, 150, "Very Large (101-150)"),
    (151, 200, "Huge (151-200)")
]

print("Score contribution by N range:")
print("="*60)
for start, end, name in ranges:
    range_score = sum(per_n_scores[n] for n in range(start, end+1))
    range_pct = range_score / sum(per_n_scores.values()) * 100
    print(f"{name:20s}: {range_score:.4f} ({range_pct:.1f}%)")

In [None]:
# Calculate theoretical minimum scores
# Tree bounding box at angle=45: ~0.813 x 0.813 (roughly square)
# Tree polygon area: ~0.3 (approximate)

tree = ChristmasTree('0', '0', '45')
tree_area = tree.polygon.area / (1e15 ** 2)
print(f"Tree polygon area at angle=45: {tree_area:.6f}")

# For N trees packed perfectly, minimum side = sqrt(N * tree_area / packing_efficiency)
# Typical packing efficiency for irregular polygons: 60-70%

print("\nTheoretical analysis:")
print("="*60)
for n in [1, 10, 50, 100, 150, 200]:
    current = per_n_scores[n]
    current_side = np.sqrt(current * n)
    
    # Theoretical minimum with 70% packing efficiency
    theoretical_side = np.sqrt(n * tree_area / 0.70)
    theoretical_score = theoretical_side ** 2 / n
    
    gap = current - theoretical_score
    gap_pct = gap / current * 100
    
    print(f"N={n:3d}: current={current:.4f}, theoretical={theoretical_score:.4f}, gap={gap:.4f} ({gap_pct:.1f}%)")

In [None]:
# Key insight: The gap to target (1.711 points) is 2.42% of current score
# This is a SIGNIFICANT gap that cannot be closed by micro-optimization

# What we've tried that DIDN'T work:
# 1. Random restart SA - NO improvements
# 2. Exhaustive search for N=1,2 - baseline already optimal
# 3. Genetic algorithm - NO improvements
# 4. Tessellation SA - WORSE than baseline
# 5. Deletion cascade - NO improvements
# 6. Ensemble from 25 sources - hit ceiling at 70.630478

# What we HAVEN'T tried:
# 1. Chebyshev-distance square-packing
# 2. Smart Scanline linear packer
# 3. 2-tree alternating lattice
# 4. Running C++ optimizer with PER-N parameters (not total iterations)

print("Summary of approaches:")
print("="*60)
print("TRIED (no improvement):")
print("  - Random restart SA")
print("  - Exhaustive search N=1,2")
print("  - Genetic algorithm")
print("  - Tessellation SA")
print("  - Deletion cascade")
print("  - Ensemble from 25 sources (hit ceiling)")
print()
print("NOT TRIED:")
print("  - Chebyshev-distance square-packing")
print("  - Smart Scanline linear packer")
print("  - 2-tree alternating lattice")
print("  - Per-N optimization with C++ optimizer")

In [None]:
# The key insight from web research:
# Top teams run THREE different packers for each N and pick the best
# This is fundamentally different from what we've been doing

# Our current approach: Ensemble from public sources
# Top team approach: Generate NEW solutions with multiple algorithms

# The gap (1.711 points) requires:
# - Average improvement of 0.00856 per N value
# - Or significant improvements on a few high-impact N values

print("Gap analysis:")
print("="*60)
print(f"Current total: 70.630478")
print(f"Target: 68.919154")
print(f"Gap: 1.711324")
print(f"Average improvement needed per N: {1.711324/200:.6f}")
print()
print("If we improve only the top 20 N values:")
print(f"  Average improvement needed: {1.711324/20:.6f} per N")
print()
print("If we improve only the top 50 N values:")
print(f"  Average improvement needed: {1.711324/50:.6f} per N")