# Loop 20 Analysis: Understanding the Gap and Finding New Approaches

## Key Insights from Research:
1. Top teams reach sub-69 scores by starting from best public baseline + heavy individual refinement
2. **ASYMMETRIC solutions are key** - especially for N < 60
3. Best public solution ~70.99, top private LB ~69.03
4. Gap is closed with hand-crafted tweaks and bespoke search

## Current Status:
- Best score: 70.630429
- Target: 68.919154
- Gap: 1.711 (2.42%)

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import matplotlib.pyplot as plt

TREE_TEMPLATE = [
    (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
    (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
    (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5)
]

def parse_s_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def create_tree_polygon(x, y, angle):
    tree = Polygon(TREE_TEMPLATE)
    tree = rotate(tree, angle, origin=(0, 0), use_radians=False)
    tree = translate(tree, x, y)
    return tree

def get_bounding_box_side(trees):
    all_x, all_y = [], []
    for tree in trees:
        minx, miny, maxx, maxy = tree.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def check_overlap(tree1, tree2):
    return tree1.overlaps(tree2) or tree1.contains(tree2) or tree2.contains(tree1)

print('Functions defined')

In [None]:
# Load current best submission
df = pd.read_csv('/home/submission/submission.csv')
df['x'] = df['x'].apply(parse_s_value)
df['y'] = df['y'].apply(parse_s_value)
df['deg'] = df['deg'].apply(parse_s_value)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

# Calculate per-N scores
scores = {}
for n in range(1, 201):
    group = df[df['n'] == n]
    trees = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    side = get_bounding_box_side(trees)
    scores[n] = (side ** 2) / n

print(f'Total score: {sum(scores.values()):.6f}')
print(f'Target: 68.919154')
print(f'Gap: {sum(scores.values()) - 68.919154:.6f}')

In [None]:
# Key insight: The egortrushin kernel uses DELETION CASCADE
# Start from a larger N configuration and iteratively delete trees
# This propagates good patterns from larger N to smaller N

# Let's test this approach on a small scale
# Start from N=10 configuration and try to get better N=9 by deletion

def deletion_cascade_test(start_n, target_n, df):
    """Test deletion cascade from start_n to target_n."""
    group = df[df['n'] == start_n]
    trees_data = [(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    trees = [create_tree_polygon(x, y, deg) for x, y, deg in trees_data]
    
    current_n = start_n
    current_trees = trees.copy()
    current_data = trees_data.copy()
    
    results = []
    
    while current_n > target_n:
        # Find the tree whose removal minimizes the bounding box
        best_side = float('inf')
        best_idx = None
        
        for i in range(len(current_trees)):
            candidate_trees = [t for j, t in enumerate(current_trees) if j != i]
            side = get_bounding_box_side(candidate_trees)
            if side < best_side:
                best_side = side
                best_idx = i
        
        # Remove the best tree
        current_trees = [t for j, t in enumerate(current_trees) if j != best_idx]
        current_data = [d for j, d in enumerate(current_data) if j != best_idx]
        current_n -= 1
        
        new_score = (best_side ** 2) / current_n
        baseline_score = scores[current_n]
        improvement = baseline_score - new_score
        
        results.append({
            'n': current_n,
            'cascade_score': new_score,
            'baseline_score': baseline_score,
            'improvement': improvement
        })
    
    return results

# Test deletion cascade from N=20 to N=10
print('Testing deletion cascade from N=20 to N=10:')
results = deletion_cascade_test(20, 10, df)
for r in results:
    status = 'BETTER' if r['improvement'] > 0 else 'WORSE'
    print(f"N={r['n']:2d}: cascade={r['cascade_score']:.6f}, baseline={r['baseline_score']:.6f}, diff={r['improvement']:.6f} ({status})")

In [None]:
# Test deletion cascade from N=100 to N=90
print('\nTesting deletion cascade from N=100 to N=90:')
results = deletion_cascade_test(100, 90, df)
for r in results:
    status = 'BETTER' if r['improvement'] > 0 else 'WORSE'
    print(f"N={r['n']:2d}: cascade={r['cascade_score']:.6f}, baseline={r['baseline_score']:.6f}, diff={r['improvement']:.6f} ({status})")

In [None]:
# Test deletion cascade from N=200 down to N=1
# This is the full egortrushin approach
print('\nFull deletion cascade from N=200 to N=1:')
print('Looking for improvements...')

improvements = []
results = deletion_cascade_test(200, 1, df)
for r in results:
    if r['improvement'] > 0.0001:  # Only show significant improvements
        improvements.append(r)
        print(f"N={r['n']:3d}: cascade={r['cascade_score']:.6f}, baseline={r['baseline_score']:.6f}, improvement={r['improvement']:.6f}")

print(f'\nTotal improvements found: {len(improvements)}')
if improvements:
    total_improvement = sum(r['improvement'] for r in improvements)
    print(f'Total potential improvement: {total_improvement:.6f}')

In [None]:
# Summary
print('='*60)
print('SUMMARY')
print('='*60)
print(f'Current score: {sum(scores.values()):.6f}')
print(f'Target: 68.919154')
print(f'Gap: {sum(scores.values()) - 68.919154:.6f}')
print()
print('Key findings:')
print('1. Deletion cascade from N=200 finds some improvements')
print('2. But the baseline is already well-optimized')
print('3. Need fundamentally different approach for significant gains')
print()
print('Next steps:')
print('1. Try ASYMMETRIC solutions for small N (research says this is key)')
print('2. Implement more aggressive local search with random restarts')
print('3. Focus on N values with worst efficiency')