# Loop 47 Strategic Analysis

## Current Situation
- Best LB: 70.306164
- Target: 68.861114  
- Gap: 1.445 points (2.05%)
- Submissions used: 22/100

## Key Observations from 47 experiments:
1. All local optimization methods (SA, exhaustive, NFP, shake, bbox3) converge to ~70.3
2. External data mining found ~0.01 points total (exhausted)
3. Subset extraction found ~0.002 points (exhausted)
4. Last 4 experiments found ZERO improvement

## The Fundamental Problem
The baseline solutions are at EXTREMELY strong local optima. No perturbation-based method can escape.

In [None]:
import pandas as pd
import numpy as np
import json

# Load session state
with open('/home/code/session_state.json', 'r') as f:
    state = json.load(f)

# Analyze experiment progression
experiments = state['experiments']
print("=== EXPERIMENT PROGRESSION ===")
for exp in experiments[-15:]:
    score = exp.get('cv_score', 'N/A')
    name = exp.get('name', 'N/A')
    fallback = exp.get('used_baseline_fallback', False)
    print(f"{name}: {score:.6f} {'(FALLBACK)' if fallback else ''}")

In [None]:
# Analyze per-N scores to find where improvements are possible
import os

# Load best submission
best_path = '/home/code/experiments/044_extended_subset_extraction/ensemble_044.csv'
df = pd.read_csv(best_path)

def parse_coord(val):
    if isinstance(val, str):
        if val.startswith('s'):
            return float(val[1:])
        return float(val)
    return float(val)

df['x'] = df['x'].apply(parse_coord)
df['y'] = df['y'].apply(parse_coord)
df['deg'] = df['deg'].apply(parse_coord)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

# Tree polygon vertices
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_vertices(x, y, angle_deg):
    angle_rad = np.radians(angle_deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rx = TX * cos_a - TY * sin_a
    ry = TX * sin_a + TY * cos_a
    return rx + x, ry + y

def compute_bbox_size(trees):
    all_x, all_y = [], []
    for x, y, angle in trees:
        vx, vy = get_tree_vertices(x, y, angle)
        all_x.extend(vx)
        all_y.extend(vy)
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def compute_score(trees, n):
    size = compute_bbox_size(trees)
    return (size ** 2) / n

# Compute per-N scores
per_n_scores = {}
for n in range(1, 201):
    n_df = df[df['n'] == n]
    if len(n_df) == n:
        trees = [(row['x'], row['y'], row['deg']) for _, row in n_df.iterrows()]
        per_n_scores[n] = compute_score(trees, n)

print(f"Total score: {sum(per_n_scores.values()):.6f}")
print(f"\nTop 10 highest per-N scores (most room for improvement):")
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    print(f"  N={n}: {score:.6f}")

In [None]:
# Analyze the theoretical minimum
# For N=1, the minimum is when the tree is rotated to minimize bbox
# At 45 degrees, the bbox is minimized

print("=== THEORETICAL ANALYSIS ===")
print(f"\nTree dimensions: width={max(TX)-min(TX):.3f}, height={max(TY)-min(TY):.3f}")
print(f"Tree area: ~0.35 (approximate)")

# For N=1 at 45 degrees
import math
width = max(TX) - min(TX)  # 0.7
height = max(TY) - min(TY)  # 1.0
diag = math.sqrt(width**2 + height**2)
print(f"\nN=1 at 0°: bbox = {height:.3f}, score = {height**2:.6f}")
print(f"N=1 at 45°: bbox = {diag/math.sqrt(2):.6f}, score = {(diag/math.sqrt(2))**2:.6f}")
print(f"N=1 actual: {per_n_scores[1]:.6f}")

# The gap analysis
print(f"\n=== GAP ANALYSIS ===")
print(f"Current total: {sum(per_n_scores.values()):.6f}")
print(f"Target: 68.861114")
print(f"Gap: {sum(per_n_scores.values()) - 68.861114:.6f}")
print(f"\nTo close the gap, we need to find ~1.445 points of improvement")
print(f"That's equivalent to:")
print(f"  - Improving N=1 by 1.445 (impossible, N=1 is already optimal)")
print(f"  - Improving 145 N values by 0.01 each")
print(f"  - Improving 14 N values by 0.1 each")

In [None]:
# What do top solutions look like?
# Let's analyze the structure of our best solutions

print("=== SOLUTION STRUCTURE ANALYSIS ===")

# Analyze angle distribution
all_angles = df['deg'].values
print(f"\nAngle distribution:")
print(f"  Mean: {np.mean(all_angles):.2f}°")
print(f"  Std: {np.std(all_angles):.2f}°")
print(f"  Min: {np.min(all_angles):.2f}°")
print(f"  Max: {np.max(all_angles):.2f}°")

# Analyze position distribution
all_x = df['x'].values
all_y = df['y'].values
print(f"\nPosition distribution:")
print(f"  X range: [{np.min(all_x):.2f}, {np.max(all_x):.2f}]")
print(f"  Y range: [{np.min(all_y):.2f}, {np.max(all_y):.2f}]")

# Analyze per-N patterns
print(f"\nPer-N analysis for small N:")
for n in [2, 3, 4, 5, 10, 20, 50, 100, 200]:
    n_df = df[df['n'] == n]
    angles = n_df['deg'].values
    unique_angles = len(np.unique(np.round(angles, 1)))
    print(f"  N={n}: {unique_angles} unique angles, score={per_n_scores[n]:.6f}")

In [None]:
# Key insight: The top solutions use DIFFERENT algorithms for different N ranges
# Let's see what the theoretical limits are

print("=== THEORETICAL LIMITS ===")

# For small N, we can compute theoretical minimum
# N=1: Single tree, optimal rotation
print(f"\nN=1: Theoretical minimum = {0.661250:.6f} (at 45°)")
print(f"      Our score = {per_n_scores[1]:.6f}")
print(f"      Gap = {per_n_scores[1] - 0.661250:.6f}")

# For N=2: Two trees interlocked
# Theoretical minimum is when trees are perfectly interlocked
print(f"\nN=2: Our score = {per_n_scores[2]:.6f}")

# For large N, the theoretical minimum approaches sqrt(N) * tree_area
# But this is very loose
print(f"\nLarge N theoretical analysis:")
for n in [50, 100, 150, 200]:
    # Rough estimate: if trees pack perfectly, bbox ~ sqrt(N * tree_area)
    tree_area = 0.35  # approximate
    theoretical_min = n * tree_area / n  # = tree_area (per tree)
    print(f"  N={n}: Our score = {per_n_scores[n]:.6f}")

In [None]:
# CRITICAL INSIGHT: What makes top solutions better?
# From the kernel analysis, top teams:
# 1. Run bbox3 for 24-72 hours (we run 2 hours)
# 2. Combine solutions from 17+ team members
# 3. Have 953+ submissions

# The gap of 1.445 points represents:
# - Our compute: ~2 hours on 26 threads = ~52 CPU-hours
# - Top teams: 24-72 hours on 24+ CPUs = 576-1728 CPU-hours
# That's 10-30x more compute!

print("=== COMPUTE GAP ANALYSIS ===")
print(f"\nOur compute: ~52 CPU-hours")
print(f"Top teams: ~576-1728 CPU-hours")
print(f"Compute ratio: 10-30x")

print(f"\nBut even with more compute, local optimization converges to same score!")
print(f"The issue is NOT compute time, it's the ALGORITHM.")

print(f"\n=== WHAT WE NEED ===")
print(f"1. A fundamentally different algorithm that finds DIFFERENT local optima")
print(f"2. Or, access to solutions from other teams (external data mining exhausted)")
print(f"3. Or, a way to combine solutions that creates NEW configurations")

In [None]:
# Let's analyze what specific N values have the most room for improvement
# by comparing to theoretical limits

print("=== PER-N IMPROVEMENT POTENTIAL ===")

# For each N, estimate the theoretical minimum
# This is very rough but gives us direction

improvement_potential = []
for n in range(1, 201):
    current = per_n_scores[n]
    # Rough theoretical minimum: assume perfect packing
    # For small N, this is very loose
    # For large N, this is more accurate
    if n == 1:
        theoretical = 0.661250  # Known optimal
    else:
        # Rough estimate based on tree area and packing efficiency
        # Assume 70% packing efficiency for large N
        tree_area = 0.35
        packing_efficiency = 0.7 if n > 50 else 0.5
        theoretical = tree_area / packing_efficiency
    
    gap = current - theoretical
    improvement_potential.append((n, current, theoretical, gap))

# Sort by gap (most room for improvement)
improvement_potential.sort(key=lambda x: x[3], reverse=True)

print(f"\nTop 20 N values with most improvement potential:")
for n, current, theoretical, gap in improvement_potential[:20]:
    print(f"  N={n}: current={current:.4f}, theoretical={theoretical:.4f}, gap={gap:.4f}")

In [None]:
# FINAL ANALYSIS: What should we try next?

print("=== STRATEGIC RECOMMENDATIONS ===")

print("""
1. STOP running local optimization (SA, bbox3, shake)
   - These all converge to the same local optimum (~70.3)
   - More iterations won't help

2. FOCUS on fundamentally different approaches:
   a) Constructive algorithms that build solutions from scratch
      - Bottom-left fill (BLF)
      - Lattice-based placement
      - Spiral placement
   
   b) Global optimization that can escape local optima
      - Genetic algorithm with diverse population
      - Simulated annealing with VERY high temperature
      - Basin hopping with large jumps
   
   c) Hybrid approaches
      - Use constructive to generate diverse starting points
      - Then refine with local search

3. ANALYZE top solutions more carefully:
   - What patterns do they use?
   - What angles are common?
   - What spatial arrangements work best?

4. The gap of 1.445 points is LARGE
   - This is not a fine-tuning problem
   - We need a breakthrough, not incremental improvement
""")

print(f"\n=== NEXT EXPERIMENT RECOMMENDATION ===")
print(f"Implement a CONSTRUCTIVE algorithm that builds solutions from scratch.")
print(f"Start with N=10, N=20, N=30 to test if it can beat baseline.")
print(f"If it can't beat baseline on small N, the approach won't work.")