# Loop 20 Strategic Analysis

## Current Situation
- Best valid score: 70.627896 (from snapshot 21191207951)
- Target: 68.919154
- Gap: 1.708742 (2.48%)

## Key Observations from 21 Experiments
1. ALL approaches converge to ~70.63 (SA, bbox3, tessellation, asymmetric, GA, etc.)
2. The baseline is at an EXTREMELY STRONG LOCAL OPTIMUM
3. Public kernels all produce similar scores
4. The target (68.919) is 2.27 points BELOW the public LB leader (71.19)

## Analysis Goals
1. Understand the per-N score distribution
2. Identify which N values have the most room for improvement
3. Test corner reconstruction technique from chistyakov kernel

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.ops import unary_union
import matplotlib.pyplot as plt

TREE_TEMPLATE = [
    (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
    (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
    (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5)
]

def parse_s_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def create_tree_polygon(x, y, angle):
    tree = Polygon(TREE_TEMPLATE)
    tree = rotate(tree, angle, origin=(0, 0), use_radians=False)
    tree = translate(tree, x, y)
    return tree

def get_bounding_box_side(trees):
    all_polygons = [t for t in trees]
    bounds = unary_union(all_polygons).bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

print("Functions defined")

In [None]:
# Load current best submission
df = pd.read_csv('/home/submission/submission.csv')
df['x'] = df['x'].apply(parse_s_value)
df['y'] = df['y'].apply(parse_s_value)
df['deg'] = df['deg'].apply(parse_s_value)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

# Calculate per-N scores
per_n_scores = {}
per_n_sides = {}
for n in range(1, 201):
    group = df[df['n'] == n]
    trees = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    side = get_bounding_box_side(trees)
    per_n_scores[n] = (side ** 2) / n
    per_n_sides[n] = side

total_score = sum(per_n_scores.values())
print(f"Total score: {total_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {total_score - 68.919154:.6f}")

In [None]:
# Analyze per-N efficiency
# Theoretical minimum side for N trees is sqrt(N * tree_area)
# Tree area is approximately 0.35 * 1.0 = 0.35 (rough estimate)

tree_area = 0.35  # Approximate area of tree polygon

efficiencies = {}
for n in range(1, 201):
    actual_area = per_n_sides[n] ** 2
    theoretical_min_area = n * tree_area
    efficiency = actual_area / theoretical_min_area
    efficiencies[n] = efficiency

# Find N values with worst efficiency (most room for improvement)
worst_efficiency = sorted(efficiencies.items(), key=lambda x: -x[1])[:20]
print("N values with WORST efficiency (most room for improvement):")
for n, eff in worst_efficiency:
    print(f"  N={n}: efficiency={eff:.2f}x, score_contribution={per_n_scores[n]:.6f}")

In [None]:
# Calculate how much improvement we need per N to reach target
target = 68.919154
gap = total_score - target

print(f"Total gap to close: {gap:.6f}")
print(f"Average gap per N: {gap / 200:.6f}")
print(f"\nIf we improve each N by the same percentage:")
required_reduction = 1 - (target / total_score)
print(f"  Required reduction: {required_reduction * 100:.2f}%")
print(f"  Each N's score must be multiplied by {1 - required_reduction:.4f}")
print(f"  Each N's side length must be multiplied by {np.sqrt(1 - required_reduction):.4f}")

In [None]:
# Identify high-leverage N values (high score contribution)
print("\nTop 20 N values by score contribution:")
top_contributors = sorted(per_n_scores.items(), key=lambda x: -x[1])[:20]
for n, score in top_contributors:
    print(f"  N={n}: score={score:.6f}, side={per_n_sides[n]:.4f}")

In [None]:
# Test corner reconstruction technique
# For a large N (e.g., 200), extract subsets by distance from corner

def corner_reconstruction(df, source_n, target_n):
    """Extract target_n trees from source_n layout by distance from corner."""
    group = df[df['n'] == source_n]
    trees = [(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    polygons = [create_tree_polygon(x, y, deg) for x, y, deg in trees]
    
    # Get bounding box
    all_bounds = unary_union(polygons).bounds
    corners = [
        (all_bounds[0], all_bounds[1]),  # bottom-left
        (all_bounds[0], all_bounds[3]),  # top-left
        (all_bounds[2], all_bounds[1]),  # bottom-right
        (all_bounds[2], all_bounds[3]),  # top-right
    ]
    
    best_side = per_n_sides[target_n]
    best_config = None
    
    for corner_x, corner_y in corners:
        # Calculate distance from corner for each tree
        distances = []
        for i, (x, y, deg) in enumerate(trees):
            poly = polygons[i]
            bounds = poly.bounds
            dist = max(
                abs(bounds[0] - corner_x),
                abs(bounds[2] - corner_x),
                abs(bounds[1] - corner_y),
                abs(bounds[3] - corner_y)
            )
            distances.append((dist, i, x, y, deg))
        
        # Sort by distance and take closest target_n trees
        distances.sort()
        selected = distances[:target_n]
        selected_trees = [create_tree_polygon(x, y, deg) for _, _, x, y, deg in selected]
        
        side = get_bounding_box_side(selected_trees)
        if side < best_side:
            best_side = side
            best_config = [(x, y, deg) for _, _, x, y, deg in selected]
    
    return best_side, best_config

print("Testing corner reconstruction from N=200 to smaller N...")
improvements = []
for target_n in range(1, 200):
    new_side, config = corner_reconstruction(df, 200, target_n)
    old_score = per_n_scores[target_n]
    new_score = (new_side ** 2) / target_n
    if new_score < old_score:
        improvement = old_score - new_score
        improvements.append((target_n, improvement, old_score, new_score))
        if improvement > 0.0001:
            print(f"  N={target_n}: {old_score:.6f} -> {new_score:.6f} (improvement: {improvement:.6f})")

print(f"\nTotal improvements found: {len(improvements)}")
print(f"Total improvement: {sum(i[1] for i in improvements):.6f}")