# Loop 42 Strategic Analysis

## Current Situation
- Best LB: 70.316492
- Target: 68.870074  
- Gap: 1.446 points (2.1%)
- 24+ consecutive experiments stuck at same score

## Key Questions
1. What is the theoretical minimum score?
2. Which N values have the most room for improvement?
3. What makes top teams' solutions different?

In [None]:
import pandas as pd
import numpy as np
import math
import os

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_bounds(x, y, deg):
    rad = math.radians(deg)
    cos_a, sin_a = math.cos(rad), math.sin(rad)
    rx = TX * cos_a - TY * sin_a + x
    ry = TX * sin_a + TY * cos_a + y
    return rx.min(), rx.max(), ry.min(), ry.max()

def compute_bbox_score(trees):
    if not trees:
        return float('inf')
    minx = miny = float('inf')
    maxx = maxy = float('-inf')
    for x, y, deg in trees:
        x0, x1, y0, y1 = get_tree_bounds(x, y, deg)
        minx = min(minx, x0)
        maxx = max(maxx, x1)
        miny = min(miny, y0)
        maxy = max(maxy, y1)
    side = max(maxx - minx, maxy - miny)
    return side**2 / len(trees)

# Single tree area (for theoretical minimum)
def single_tree_area():
    from shapely.geometry import Polygon
    poly = Polygon(list(zip(TX, TY)))
    return poly.area

print(f"Single tree area: {single_tree_area():.6f}")
print(f"Single tree bounding box at 45°: {compute_bbox_score([(0, 0, 45)]):.6f}")
print(f"Single tree bounding box at 0°: {compute_bbox_score([(0, 0, 0)]):.6f}")

In [None]:
# Load current best submission and analyze per-N scores
def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def load_config(df, n):
    pattern = f'{n:03d}_'
    cfg = df[df['id'].str.startswith(pattern)].copy()
    cfg['tree_idx'] = cfg['id'].apply(lambda x: int(x.split('_')[1]))
    cfg = cfg.sort_values('tree_idx')
    trees = []
    for _, row in cfg.iterrows():
        x = parse_s_value(row['x'])
        y = parse_s_value(row['y'])
        deg = parse_s_value(row['deg'])
        trees.append((x, y, deg))
    return trees

# Load best submission
df = pd.read_csv('/home/submission/submission.csv')

per_n_scores = {}
for n in range(1, 201):
    trees = load_config(df, n)
    per_n_scores[n] = compute_bbox_score(trees)

total = sum(per_n_scores.values())
print(f"Total score: {total:.6f}")
print(f"Target: 68.870074")
print(f"Gap: {total - 68.870074:.6f}")

# Find N values with highest contribution
print("\nTop 20 N values by score contribution:")
sorted_n = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_n[:20]:
    print(f"N={n}: {score:.6f}")

In [None]:
# Theoretical minimum analysis
# For N trees, the theoretical minimum bounding box is when trees are packed perfectly
# The tree area is ~0.2375, so minimum side for N trees is sqrt(N * 0.2375)

tree_area = single_tree_area()
print(f"Tree area: {tree_area:.6f}")

print("\nTheoretical minimum vs actual scores:")
print("N | Actual | Theoretical Min | Gap | % Above Min")
print("-" * 60)

theoretical_total = 0
actual_total = 0
gaps = []

for n in range(1, 201):
    actual = per_n_scores[n]
    # Theoretical minimum: if we could pack N trees with no wasted space
    # side = sqrt(N * tree_area), score = side^2 / N = tree_area
    theoretical_min = tree_area  # This is the absolute lower bound
    
    gap = actual - theoretical_min
    pct_above = (actual / theoretical_min - 1) * 100
    gaps.append((n, actual, theoretical_min, gap, pct_above))
    
    theoretical_total += theoretical_min
    actual_total += actual

print(f"\nTheoretical minimum total: {theoretical_total:.6f}")
print(f"Actual total: {actual_total:.6f}")
print(f"Gap: {actual_total - theoretical_total:.6f}")

# Show N values with largest gaps
print("\nN values with largest gaps (most room for improvement):")
gaps_sorted = sorted(gaps, key=lambda x: x[3], reverse=True)
for n, actual, theo, gap, pct in gaps_sorted[:20]:
    print(f"N={n}: actual={actual:.6f}, theoretical={theo:.6f}, gap={gap:.6f} ({pct:.1f}% above)")


In [None]:
# The theoretical minimum is unrealistic because trees can't pack perfectly
# Let's look at what the BEST solutions achieve for each N
# by examining all available CSV files

import glob

# Find all CSV files in snapshots
csv_files = glob.glob('/home/nonroot/snapshots/**/*.csv', recursive=True)
print(f"Found {len(csv_files)} CSV files")

# Sample a few to understand the score distribution
best_per_n = {n: per_n_scores[n] for n in range(1, 201)}  # Start with current best

for csv_path in csv_files[:100]:  # Sample first 100
    try:
        df_temp = pd.read_csv(csv_path)
        if 'id' not in df_temp.columns:
            continue
        for n in range(1, 201):
            trees = load_config(df_temp, n)
            if trees:
                score = compute_bbox_score(trees)
                if score < best_per_n[n]:
                    best_per_n[n] = score
    except:
        continue

print("\nBest per-N scores found:")
best_total = sum(best_per_n.values())
print(f"Best possible total: {best_total:.6f}")
print(f"Current total: {total:.6f}")
print(f"Potential improvement: {total - best_total:.6f}")

In [None]:
# Key insight: The gap to target is 1.446 points
# If we need to improve by 1.446 points across 200 N values,
# that's an average of 0.00723 per N value

required_improvement = 68.870074 - total
print(f"Required improvement: {required_improvement:.6f}")
print(f"Average improvement needed per N: {required_improvement / 200:.6f}")

# But improvements are NOT uniform - some N values have more room
# Let's see which N values contribute most to the gap

print("\nN values where we're furthest from theoretical minimum:")
for n, actual, theo, gap, pct in gaps_sorted[:30]:
    improvement_potential = gap * 0.1  # Assume we can capture 10% of the gap
    print(f"N={n}: gap={gap:.4f}, 10% capture={improvement_potential:.4f}")

print("\n=== STRATEGIC INSIGHT ===")
print("The gap to target (1.446) is ~2.1% of current score.")
print("Top teams achieved this through:")
print("1. 900+ submissions accumulating tiny per-N improvements")
print("2. Custom optimizers running for DAYS (not minutes)")
print("3. Novel asymmetric solutions not in public data")
print("\nWe have 87 submissions remaining and limited time.")
print("Focus should be on:")
print("- Finding ANY improvement, even tiny ones")
print("- Submitting EVERY experiment to track per-N improvements")
print("- Running optimizers for EXTENDED time (hours, not minutes)")