# Loop 16 Analysis: Iterative Mixing Strategy

## Key Insights from Research

1. **jiweiliu kernel**: Shows iterative mixing achieves continuous improvements:
   - 71.65 → 71.46 (SA workflow)
   - 71.46 → 71.45 (guided refinement)
   - 71.45 → 71.36 (SA workflow again)

2. **sacuscreed kernel**: Guided refinement = small perturbations to squeeze improvements

3. **chistyakov kernel**: Corner-based reconstruction from large N to small N

## Current Status
- Best score: 70.630478 (saspav_best ensemble)
- Target: 68.919154
- Gap: 1.711 points (2.42%)
- 7 consecutive experiments with NO improvement

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union

getcontext().prec = 25
scale_factor = Decimal("1e15")

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h
        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x * scale_factor), yoff=float(self.center_y * scale_factor))
    def clone(self):
        return ChristmasTree(str(self.center_x), str(self.center_y), str(self.angle))

def load_trees(n, df):
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"]).lstrip('s')
        y = str(row["y"]).lstrip('s')
        deg = str(row["deg"]).lstrip('s')
        trees.append(ChristmasTree(x, y, deg))
    return trees

def calculate_score(trees):
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / 1e15 for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    return max(max_x - min_x, max_y - min_y) ** 2 / len(trees)

print("Functions loaded")

In [None]:
# Load current best\nimport os\nos.chdir('/home/code/exploration')\ncurrent_best_df = pd.read_csv('datasets/saspav_best.csv')\n\n# Calculate per-N scores\nscores = {}\nfor n in range(1, 201):\n    trees = load_trees(n, current_best_df)\n    scores[n] = calculate_score(trees)\n\ntotal_score = sum(scores.values())\nprint(f\"Current total score: {total_score:.6f}\")\nprint(f\"Target: 68.919154\")\nprint(f\"Gap: {total_score - 68.919154:.6f} ({(total_score - 68.919154)/68.919154*100:.2f}%)\")\n\n# Identify worst efficiency N values\nefficiencies = {}\nfor n, score in scores.items():\n    # Efficiency = tree_area * n / bounding_box_area\n    tree_area = 0.2525  # approximate tree area\n    bbox_area = score * n  # score = side^2 / n, so side^2 = score * n\n    efficiency = (tree_area * n) / bbox_area * 100\n    efficiencies[n] = efficiency\n\nprint(\"\\nWorst efficiency N values:\")\nworst = sorted(efficiencies.items(), key=lambda x: x[1])[:10]\nfor n, eff in worst:\n    print(f\"  N={n}: {eff:.1f}% efficiency, score={scores[n]:.6f}\")

In [None]:
# Analyze score contribution by N range
ranges = [
    (1, 20, "Small N"),
    (21, 50, "Medium N"),
    (51, 100, "Large N"),
    (101, 200, "Very Large N")
]

print("Score contribution by range:")
for start, end, name in ranges:
    range_score = sum(scores[n] for n in range(start, end+1))
    pct = range_score / total_score * 100
    print(f"  {name} ({start}-{end}): {range_score:.4f} ({pct:.1f}%)")

# Calculate how much improvement needed from each range to close gap
gap = total_score - 68.919154
print(f"\nGap to close: {gap:.6f}")
print("\nIf we improve each range by X%:")
for start, end, name in ranges:
    range_score = sum(scores[n] for n in range(start, end+1))
    improvement_needed = gap / range_score * 100
    print(f"  {name}: need {improvement_needed:.1f}% improvement to close entire gap")

In [None]:
# Corner-based reconstruction analysis
# For each large N, check if corner subsets beat current best for smaller N

def get_corner_subsets(trees, corner_idx):
    """Get trees sorted by distance from a corner."""
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    
    corners = [
        (bounds[0], bounds[1]),  # bottom-left
        (bounds[0], bounds[3]),  # top-left
        (bounds[2], bounds[1]),  # bottom-right
        (bounds[2], bounds[3]),  # top-right
    ]
    corner_x, corner_y = corners[corner_idx]
    
    # Calculate max distance from corner for each tree
    distances = []
    for tree in trees:
        b = tree.polygon.bounds
        dist = max(
            abs(b[0] - corner_x),
            abs(b[2] - corner_x),
            abs(b[1] - corner_y),
            abs(b[3] - corner_y)
        )
        distances.append((dist, tree))
    
    # Sort by distance
    distances.sort(key=lambda x: x[0])
    return [t for _, t in distances]

# Test corner-based reconstruction for N=200
print("Testing corner-based reconstruction from N=200...")
large_trees = load_trees(200, current_best_df)

improvements = []
for corner_idx in range(4):
    corner_names = ['bottom-left', 'top-left', 'bottom-right', 'top-right']
    sorted_trees = get_corner_subsets(large_trees, corner_idx)
    
    for target_n in range(2, 50):
        subset = [t.clone() for t in sorted_trees[:target_n]]
        subset_score = calculate_score(subset)
        current_score = scores[target_n]
        
        if subset_score < current_score:
            improvement = current_score - subset_score
            improvements.append((target_n, corner_names[corner_idx], subset_score, current_score, improvement))

if improvements:
    print(f"\nFound {len(improvements)} improvements from corner reconstruction!")
    for n, corner, new_score, old_score, imp in improvements[:10]:
        print(f"  N={n} ({corner}): {old_score:.6f} -> {new_score:.6f} (improvement: {imp:.6f})")
else:
    print("\nNo improvements found from corner reconstruction.")

## Key Finding: Need Iterative Mixing Approach

The jiweiliu kernel shows that continuous improvements come from ITERATING between:
1. SA optimization (generates new configurations)
2. Guided refinement (small perturbations)
3. Deletion cascade (propagates improvements from large N to small N)

We have NOT tried this iterative mixing approach yet. All our experiments have been single-pass.

In [None]:
# Check what kernels we haven't fully implemented
print("Kernel Implementation Status:")
print("="*60)
print("\n1. jiweiliu (super-fast-simulated-annealing-with-translations):")
print("   - Numba-accelerated SA: NOT IMPLEMENTED")
print("   - Grid configuration exploration: NOT IMPLEMENTED")
print("   - Deletion cascade: NOT IMPLEMENTED")
print("   - Iterative mixing with guided refinement: NOT IMPLEMENTED")

print("\n2. sacuscreed (guided-refinement):")
print("   - Small perturbation refinement: NOT IMPLEMENTED")

print("\n3. chistyakov (corner-based reconstruction):")
print("   - Corner-based subset extraction: PARTIALLY TESTED (no improvements found)")

print("\n4. saspav (santa-submission):")
print("   - Best ensemble: IMPLEMENTED (current best 70.630478)")

print("\n" + "="*60)
print("RECOMMENDATION: Implement jiweiliu's full workflow with Numba SA + deletion cascade")