# Evolver Loop 12 - LB Feedback Analysis

## Submission Result
- **CV Score**: 70.6600
- **LB Score**: 70.6600
- **Gap**: 0.0000 (perfect match)

## Key Observations

1. **CV-LB Gap is ZERO** - This is an optimization problem, not a prediction problem. CV = LB exactly.

2. **12 experiments, all converging to ~70.66** - We are stuck at a strong local optimum.

3. **Target is 68.92** - We need to improve by 1.74 points (2.5%).

4. **Local optimization has failed** - bbox3, SA, rotation optimization, tree removal - all show negligible improvement.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Experiment history
experiments = [
    ('exp_000', 'Baseline', 70.659959),
    ('exp_001', 'C++ SA', 70.659959),
    ('exp_002', 'Lattice Construction', 70.659959),
    ('exp_003', 'Lattice + SA', 70.659959),
    ('exp_004', 'Comprehensive Ensemble', 51.423527),  # Invalid - overlaps
    ('exp_005', 'Valid Ensemble', 70.659959),
    ('exp_006', 'Eazy Optimizer', 70.659944),
    ('exp_007', 'Rotation Optimization', 70.659959),
    ('exp_008', 'Multi-Seed bbox3', 70.659958666),
    ('exp_009', 'Tree Removal', 70.659959),
    ('exp_010', 'bbox3 with Repair', 70.659958437),
    ('exp_011', 'Long bbox3 Multi-Phase', 70.659958593),
]

df = pd.DataFrame(experiments, columns=['id', 'name', 'score'])
print("Experiment History:")
print(df.to_string(index=False))

# Filter valid experiments (exclude the invalid ensemble)
valid_df = df[df['score'] > 60]
print(f"\nBest valid score: {valid_df['score'].min():.9f}")
print(f"Target: 68.919154")
print(f"Gap: {valid_df['score'].min() - 68.919154:.6f} ({(valid_df['score'].min() - 68.919154) / 68.919154 * 100:.2f}%)")

# All valid experiments converge to the same score
print(f"\nScore variance: {valid_df['score'].var():.12f}")
print("All experiments converge to ~70.66 - we are at a LOCAL OPTIMUM")

In [None]:
# Analyze per-N contribution to understand where improvements are possible
import os
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity

getcontext().prec = 30
scale_factor = Decimal("1e18")

class ChristmasTree:
    def __init__(self, center_x="0", center_y="0", angle="0"):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal("0.15")
        trunk_h = Decimal("0.2")
        base_w = Decimal("0.7")
        mid_w = Decimal("0.4")
        top_w = Decimal("0.25")
        tip_y = Decimal("0.8")
        tier_1_y = Decimal("0.5")
        tier_2_y = Decimal("0.25")
        base_y = Decimal("0.0")
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal("0.0") * scale_factor, tip_y * scale_factor),
            (top_w / Decimal("2") * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal("4") * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal("2") * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal("4") * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal("4")) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal("2")) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal("4")) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal("2")) * scale_factor, tier_1_y * scale_factor),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated,
            xoff=float(self.center_x * scale_factor),
            yoff=float(self.center_y * scale_factor),
        )

def load_configuration_from_df(n, df):
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"])[1:] if str(row["x"]).startswith('s') else str(row["x"])
        y = str(row["y"])[1:] if str(row["y"]).startswith('s') else str(row["y"])
        deg = str(row["deg"])[1:] if str(row["deg"]).startswith('s') else str(row["deg"])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def get_score(trees, n):
    if not trees:
        return float('inf')
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    return side_length**2 / n

print("Functions defined")

In [None]:
# Load baseline and compute per-N scores
df_baseline = pd.read_csv('/home/code/external_data/saspav/santa-2025.csv')

per_n_scores = []
for n in range(1, 201):
    trees = load_configuration_from_df(n, df_baseline)
    if trees:
        score = get_score(trees, n)
        per_n_scores.append({'n': n, 'score': score, 'contribution': score})

per_n_df = pd.DataFrame(per_n_scores)
print(f"Total score: {per_n_df['score'].sum():.9f}")
print(f"\nPer-N score distribution:")
print(per_n_df.describe())

In [None]:
# Compute efficiency (score / theoretical minimum)
# Theoretical minimum for n trees is approximately n * tree_area / n = tree_area
# But for packing, the efficiency is side^2 / n where side is the bounding box side

# Single tree area (approximate)
tree_width = 0.7  # base width
tree_height = 1.0  # from trunk bottom to tip
tree_area = tree_width * tree_height * 0.5  # rough triangle approximation

# For n trees, theoretical minimum side = sqrt(n * tree_area)
per_n_df['theoretical_min_side'] = np.sqrt(per_n_df['n'] * tree_area)
per_n_df['actual_side'] = np.sqrt(per_n_df['score'] * per_n_df['n'])
per_n_df['efficiency'] = per_n_df['theoretical_min_side']**2 / (per_n_df['actual_side']**2)

print("Efficiency analysis (higher is better):")
print(per_n_df[['n', 'score', 'efficiency']].head(20))

# Find N values with worst efficiency
worst_efficiency = per_n_df.nsmallest(20, 'efficiency')
print(f"\nN values with WORST efficiency (most room for improvement):")
print(worst_efficiency[['n', 'score', 'efficiency']])

In [None]:
# Plot efficiency by N
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.plot(per_n_df['n'], per_n_df['score'], 'b-', alpha=0.7)
plt.xlabel('N (number of trees)')
plt.ylabel('Score (sÂ²/n)')
plt.title('Per-N Score Distribution')
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(per_n_df['n'], per_n_df['efficiency'], 'g-', alpha=0.7)
plt.xlabel('N (number of trees)')
plt.ylabel('Efficiency')
plt.title('Packing Efficiency by N')
plt.axhline(y=1.0, color='r', linestyle='--', label='Theoretical max')
plt.grid(True, alpha=0.3)
plt.legend()

plt.tight_layout()
plt.savefig('/home/code/exploration/per_n_analysis.png', dpi=100)
plt.show()

print("\nKey insight: Small N values (1-10) have the WORST efficiency.")
print("These contribute disproportionately to the total score.")
print(f"N=1-10 total contribution: {per_n_df[per_n_df['n'] <= 10]['score'].sum():.4f}")
print(f"N=1-10 as % of total: {per_n_df[per_n_df['n'] <= 10]['score'].sum() / per_n_df['score'].sum() * 100:.1f}%")

In [None]:
# Calculate how much improvement is needed per N to reach target
target = 68.919154
current = per_n_df['score'].sum()
gap = current - target

print(f"Current score: {current:.6f}")
print(f"Target score: {target:.6f}")
print(f"Gap to close: {gap:.6f}")
print(f"Gap as %: {gap / current * 100:.2f}%")

# If we improve each N by the same percentage
required_improvement_pct = gap / current
print(f"\nRequired improvement per N: {required_improvement_pct * 100:.2f}%")

# But small N values have more room for improvement
# Let's see what happens if we focus on N=1-50
small_n_contribution = per_n_df[per_n_df['n'] <= 50]['score'].sum()
print(f"\nN=1-50 contribution: {small_n_contribution:.4f} ({small_n_contribution / current * 100:.1f}% of total)")
print(f"If we improve N=1-50 by 10%: saves {small_n_contribution * 0.10:.4f}")
print(f"If we improve N=1-50 by 20%: saves {small_n_contribution * 0.20:.4f}")
print(f"If we improve N=1-50 by 30%: saves {small_n_contribution * 0.30:.4f}")

## Strategic Analysis

### What We've Tried (All Failed)
1. **Local optimization (SA, bbox3)** - Negligible improvement (~0.000001)
2. **Rotation optimization** - No improvement (baseline already optimal)
3. **Tree removal technique** - No improvement
4. **Lattice construction** - Much worse than baseline
5. **Ensembling** - No valid sources better than baseline

### What We Haven't Tried
1. **Genetic Algorithm** - Crossover of configurations from different N values
2. **Basin Hopping** - Random perturbations followed by local optimization
3. **Constraint Programming (CP-SAT)** - Exact solver for small N
4. **Asymmetric solutions** - Discussion 666880 suggests these are better
5. **Different lattice types** - Hexagonal, FCC, HCP
6. **Focus on small N** - N=1-10 have worst efficiency

### Key Insight
The target (68.92) is 2.27 points BELOW the public LB best (71.19). This means:
- Top teams have techniques they haven't shared publicly
- We need to DISCOVER new techniques, not just optimize existing ones
- The gap is significant but achievable (2.5%)

In [None]:
# Check what the optimal N=1 solution should be
# For N=1, the optimal solution is a single tree at the origin with optimal rotation
# The bounding box is determined by the tree's dimensions

# Tree dimensions
tree_width = 0.7  # base width
tree_height = 1.0  # from -0.2 to 0.8

# For a single tree, the optimal rotation minimizes the bounding box
# At 0 degrees: width=0.7, height=1.0, side=1.0, score=1.0
# At 45 degrees: the tree is rotated, bounding box changes

import math

def compute_tree_bbox(angle_deg):
    """Compute bounding box for a single tree at given rotation."""
    angle_rad = math.radians(angle_deg)
    cos_a = math.cos(angle_rad)
    sin_a = math.sin(angle_rad)
    
    # Tree vertices (simplified)
    vertices = [
        (0, 0.8),  # tip
        (0.35, 0),  # right base
        (-0.35, 0),  # left base
        (0.075, -0.2),  # right trunk
        (-0.075, -0.2),  # left trunk
    ]
    
    # Rotate vertices
    rotated = []
    for x, y in vertices:
        rx = x * cos_a - y * sin_a
        ry = x * sin_a + y * cos_a
        rotated.append((rx, ry))
    
    xs = [p[0] for p in rotated]
    ys = [p[1] for p in rotated]
    
    width = max(xs) - min(xs)
    height = max(ys) - min(ys)
    side = max(width, height)
    
    return side, side**2

# Find optimal rotation for N=1
best_angle = 0
best_score = float('inf')
for angle in range(0, 360):
    side, score = compute_tree_bbox(angle)
    if score < best_score:
        best_score = score
        best_angle = angle

print(f"Optimal rotation for N=1: {best_angle} degrees")
print(f"Optimal score for N=1: {best_score:.6f}")
print(f"Current N=1 score: {per_n_df[per_n_df['n'] == 1]['score'].values[0]:.6f}")

# The baseline N=1 score
baseline_n1 = per_n_df[per_n_df['n'] == 1]['score'].values[0]
print(f"\nN=1 improvement potential: {baseline_n1 - best_score:.6f}")

In [None]:
# Summary of findings
print("="*60)
print("SUMMARY OF FINDINGS")
print("="*60)
print(f"")
print(f"Current best score: 70.659958593")
print(f"Target score: 68.919154")
print(f"Gap: 1.740804 (2.5%)")
print(f"")
print(f"Key observations:")
print(f"1. All 12 experiments converge to ~70.66 - STRONG LOCAL OPTIMUM")
print(f"2. Local optimization (SA, bbox3) provides ~0 improvement")
print(f"3. N=1-10 have worst efficiency but small contribution")
print(f"4. Target is BELOW public LB best - need novel techniques")
print(f"")
print(f"Recommended next steps:")
print(f"1. Implement Genetic Algorithm with crossover")
print(f"2. Try Basin Hopping (random perturbations + local opt)")
print(f"3. Focus on asymmetric solutions (per discussion 666880)")
print(f"4. Try CP-SAT for small N values (exact solver)")
print(f"5. Explore different lattice types for large N")
print(f"")
print(f"CRITICAL: Local optimization is EXHAUSTED. Must pivot to global search.")