# Loop 2 LB Feedback Analysis

## Submission Results
- exp_001 (001_better_baseline): CV 70.5728 | LB 70.6151 | Gap: -0.0423

## Key Observations
1. The submission PASSED Kaggle validation (no overlap error!)
2. LB score (70.6151) is slightly worse than CV (70.5728) - this is expected
3. We now have a valid baseline to improve upon

## Current Status
- Best LB: 70.6151
- Target: 68.8872
- Gap: 1.73 points (2.5%)

In [None]:
import pandas as pd
import numpy as np
import json

# Load session state
with open('/home/code/session_state.json', 'r') as f:
    state = json.load(f)

print("Submissions:")
for sub in state['submissions']:
    print(f"  {sub['model_name']}: CV={sub['cv_score']:.4f}, LB={sub.get('lb_score', 'N/A')}")
    if sub.get('error'):
        print(f"    ERROR: {sub['error']}")

In [None]:
# Analyze the gap between CV and LB
submissions = state['submissions']
valid_subs = [s for s in submissions if s.get('lb_score') and not s.get('error')]

print(f"\nValid submissions: {len(valid_subs)}")
for s in valid_subs:
    cv = s['cv_score']
    lb = s['lb_score']
    gap = lb - cv
    print(f"  {s['model_name']}: CV={cv:.4f}, LB={lb:.4f}, Gap={gap:.4f}")

In [None]:
# Key insight from the kernel analysis:
# Top teams use:
# 1. Ensemble of best per-N solutions from multiple sources
# 2. Simulated annealing with fractional translation (very fine moves)
# 3. C++ for speed

# The kernel shows that N=1 is fixed at 45 degrees (optimal)
# The key is to improve N=2 to N=200

# Let's analyze the per-N scores in our current submission
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, angle):
    poly = Polygon(zip(TX, TY))
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def parse_submission(df):
    solutions = {}
    for _, row in df.iterrows():
        id_parts = row['id'].split('_')
        n = int(id_parts[0])
        x_str = str(row['x'])
        y_str = str(row['y'])
        deg_str = str(row['deg'])
        x = float(x_str[1:] if x_str.startswith('s') else x_str)
        y = float(y_str[1:] if y_str.startswith('s') else y_str)
        angle = float(deg_str[1:] if deg_str.startswith('s') else deg_str)
        if n not in solutions:
            solutions[n] = []
        solutions[n].append((x, y, angle))
    return solutions

print("Loading current submission...")
df = pd.read_csv('/home/submission/submission.csv')
solutions = parse_submission(df)
print(f"Loaded solutions for N=1 to {max(solutions.keys())}")

In [None]:
# Calculate per-N scores
def calculate_per_n_scores(solutions):
    per_n_scores = {}
    for n in range(1, 201):
        trees = solutions[n]
        polys = [create_tree_polygon(*t) for t in trees]
        union = unary_union(polys)
        bounds = union.bounds
        side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
        contribution = (side ** 2) / n
        per_n_scores[n] = {'side': side, 'contribution': contribution}
    return per_n_scores

per_n = calculate_per_n_scores(solutions)
total = sum(p['contribution'] for p in per_n.values())
print(f"Total score: {total:.6f}")

# Show top contributors
print("\nTop 10 contributors to score:")
sorted_n = sorted(per_n.items(), key=lambda x: x[1]['contribution'], reverse=True)
for n, data in sorted_n[:10]:
    print(f"  N={n}: contribution={data['contribution']:.6f}, side={data['side']:.6f}")

In [None]:
# Analyze where improvements are possible
# Compare to theoretical minimum (perfect packing)

print("\nAnalysis of improvement potential:")
print("="*60)

# For small N, we can estimate theoretical minimum
# N=1: side = 0.8131 (optimal at 45 degrees) - ALREADY OPTIMAL
# N=2: theoretical minimum depends on arrangement

# Let's look at the gap between current and what might be achievable
print("\nSmall N analysis (N=1-20):")
for n in range(1, 21):
    data = per_n[n]
    print(f"  N={n}: side={data['side']:.6f}, contribution={data['contribution']:.6f}")

In [None]:
# Key insight: The target is 68.89, we're at 70.62
# Gap = 1.73 points
# This is a 2.5% improvement needed

# Looking at the kernel, the approach is:
# 1. Ensemble best per-N from multiple sources
# 2. Run SA with fractional translation
# 3. Repeat many times

# The kernel shows that top teams have 900+ submissions
# Each submission tries to improve some N values
# The final score is the best per-N across all submissions

print("\nStrategy Analysis:")
print("="*60)
print("\n1. Current approach: Single snapshot baseline")
print("   - Score: 70.62")
print("   - This is already highly optimized")
print("\n2. Top team approach: Ensemble + SA + many iterations")
print("   - Score: 68.89")
print("   - 900+ submissions")
print("   - Best per-N from all sources")
print("\n3. Gap analysis:")
print(f"   - Need to improve by {70.62 - 68.89:.2f} points")
print(f"   - This is {(70.62 - 68.89) / 70.62 * 100:.1f}% improvement")
print("\n4. Key insight from kernel:")
print("   - N=1 is already optimal (45 degrees)")
print("   - Improvements come from N=2 to N=200")
print("   - Fractional translation (0.001, 0.0005, etc.) is key")
print("   - Need to run many iterations with different seeds")

In [None]:
# Save analysis
analysis = {
    'best_lb': 70.6151,
    'target': 68.8872,
    'gap': 1.73,
    'gap_percent': 2.5,
    'key_insights': [
        'N=1 is already optimal at 45 degrees',
        'Top teams use ensemble of best per-N from multiple sources',
        'Fractional translation (0.001, 0.0005, etc.) is key technique',
        'Need many iterations with different seeds',
        'C++ implementation for speed'
    ],
    'next_steps': [
        'Implement Python-based SA with fractional translation',
        'Focus on small N (2-20) where improvements are most valuable',
        'Try different initial configurations',
        'Ensemble best per-N across experiments'
    ]
}

print("\nAnalysis saved.")
print(json.dumps(analysis, indent=2))