# Loop 5 LB Feedback Analysis

## Submission Results
- **006_corner_extraction_ensemble**: CV 70.6755 | LB 70.6755 (gap: 0.0000)

## Key Observations
1. CV-LB gap is essentially zero - scoring is accurate
2. Current best: 70.675457
3. Target: 68.919154
4. Gap to target: 1.756303 (2.55%)

## The Problem
The improvement rate is too slow:
- Improvement achieved: 0.000645 per experiment
- Gap to close: 1.756303
- At current rate: ~2,700 experiments needed

## Strategy Analysis
We need to run the bbox3 optimizer for HOURS, not minutes.

In [None]:
# Check what bbox3 binaries we have available
import os
import subprocess

# Check the why-not kernel's bbox3
why_not_bbox3 = '/home/code/research/kernels/jazivxt_why-not/bbox3.cpp'
print(f"why-not bbox3.cpp exists: {os.path.exists(why_not_bbox3)}")

# Check the pre-compiled bbox3
precompiled_bbox3 = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bbox3'
print(f"Pre-compiled bbox3 exists: {os.path.exists(precompiled_bbox3)}")

# Check our current bbox3
our_bbox3 = '/home/code/experiments/006_corner_extraction/bbox3'
print(f"Our bbox3 exists: {os.path.exists(our_bbox3)}")

# Check if we can run it
if os.path.exists(our_bbox3):
    result = subprocess.run([our_bbox3, '-h'], capture_output=True, text=True, timeout=5)
    print(f"\nOur bbox3 help output:")
    print(result.stdout[:500] if result.stdout else result.stderr[:500])

In [None]:
# Analyze the current best solution to understand where improvements are possible
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    poly = Polygon(TREE_VERTICES)
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    if not polygons:
        return 0
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    return max(max(xs) - min(xs), max(ys) - min(ys))

# Load current best
df = pd.read_csv('/home/code/experiments/006_corner_extraction/ensemble_best.csv')
df['x_val'] = df['x'].apply(parse_s_value)
df['y_val'] = df['y'].apply(parse_s_value)
df['deg_val'] = df['deg'].apply(parse_s_value)

# Calculate per-N scores and efficiency
results = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    group = df[df['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
    side = get_bounding_box_side(polygons)
    score = side**2 / n
    
    # Calculate packing efficiency
    tree_area = 0.2775  # Area of one tree
    total_tree_area = n * tree_area
    bbox_area = side**2
    efficiency = total_tree_area / bbox_area if bbox_area > 0 else 0
    
    results.append({
        'n': n,
        'side': side,
        'score': score,
        'efficiency': efficiency
    })

results_df = pd.DataFrame(results)
print(f"Total score: {results_df['score'].sum():.6f}")
print(f"\nTop 10 score contributors (worst efficiency):")
print(results_df.nlargest(10, 'score')[['n', 'side', 'score', 'efficiency']])

In [None]:
# Calculate theoretical improvement potential
# If we could achieve 0.86 efficiency (best observed) for all N

best_efficiency = 0.86
tree_area = 0.2775

theoretical_scores = []
for n in range(1, 201):
    # Theoretical minimum side if we achieve best efficiency
    total_tree_area = n * tree_area
    theoretical_bbox_area = total_tree_area / best_efficiency
    theoretical_side = np.sqrt(theoretical_bbox_area)
    theoretical_score = theoretical_side**2 / n
    theoretical_scores.append(theoretical_score)

theoretical_total = sum(theoretical_scores)
print(f"Current score: {results_df['score'].sum():.6f}")
print(f"Theoretical minimum (0.86 efficiency): {theoretical_total:.6f}")
print(f"Potential improvement: {results_df['score'].sum() - theoretical_total:.6f}")
print(f"Target: 68.919154")
print(f"Gap to target: {results_df['score'].sum() - 68.919154:.6f}")

# Which N values have the most room for improvement?
results_df['theoretical_score'] = theoretical_scores
results_df['improvement_potential'] = results_df['score'] - results_df['theoretical_score']
print(f"\nTop 10 N values with most improvement potential:")
print(results_df.nlargest(10, 'improvement_potential')[['n', 'score', 'theoretical_score', 'improvement_potential', 'efficiency']])

In [None]:
# Check what the bbox3 runner kernel does
# It runs a 3-phase optimization:
# Phase A: 2-min runs with n=1000-2000, r=30-90 to find promising parameters
# Phase B: 10-min runs on top candidates  
# Phase C: 20-min runs on best few

# Total time: 3 hours
# This is what we need to do!

print("bbox3 runner kernel strategy:")
print("="*50)
print("Phase A: 2-min runs (15 combinations)")
print("  n_values: [1000, 1200, 1500, 1800, 2000]")
print("  r_values: [30, 60, 90]")
print("  Select top 6 candidates")
print()
print("Phase B: 10-min runs on top 3 candidates")
print()
print("Phase C: 20-min runs on top 2 candidates")
print()
print("Total budget: 3 hours")
print()
print("Key features:")
print("- fix_direction rotation optimization after each run")
print("- repair_overlaps_in_place to fix any overlaps")
print("- Only accept improvements that beat current best")

In [None]:
# Let's compile the why-not kernel's bbox3.cpp which has:
# - Complex Number Vector Coordination
# - Fluid Dynamics simulation
# - Hinge Pivot optimization
# - Density Gradient Flow
# - Global Boundary Tension
# - Dynamic Scaling and Overlap Repair (global_squeeze)

import subprocess
import os

os.makedirs('/home/code/experiments/007_long_bbox3', exist_ok=True)
os.chdir('/home/code/experiments/007_long_bbox3')

# Copy the bbox3.cpp
import shutil
shutil.copy('/home/code/research/kernels/jazivxt_why-not/bbox3.cpp', 'bbox3.cpp')

# Compile with OpenMP for parallel processing
result = subprocess.run(
    ['g++', '-O3', '-fopenmp', '-march=native', '-o', 'bbox3', 'bbox3.cpp'],
    capture_output=True, text=True
)

if result.returncode == 0:
    print("Compilation successful!")
    print(f"bbox3 binary created: {os.path.exists('bbox3')}")
else:
    print("Compilation failed:")
    print(result.stderr)

In [None]:
# Copy the current best solution to the experiment directory
import shutil

shutil.copy('/home/code/experiments/006_corner_extraction/ensemble_best.csv', 
            '/home/code/experiments/007_long_bbox3/submission.csv')

# Verify the copy
import pandas as pd
df = pd.read_csv('/home/code/experiments/007_long_bbox3/submission.csv')
print(f"Copied submission.csv with {len(df)} rows")
print(f"First few rows:")
print(df.head())