# Loop 2 LB Feedback Analysis

**LB Score: 71.8128** (matches CV exactly - perfect calibration!)

## Key Insights:
1. CV-LB gap is 0.0000 - our scoring is perfectly calibrated
2. The valid submission (non-touching) scores 71.81
3. The touching submission would score 70.65 but is rejected
4. Gap to target: 71.81 - 68.92 = 2.89 points (4.2%)

## Strategy Analysis:
- The 1.17 point penalty for non-touching vs touching is significant
- We need to either:
  1. Find better valid configurations from scratch
  2. Apply micro-separation to touching trees to minimize score impact
  3. Run optimization with gap constraints from the start

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from itertools import combinations
import json
import os

getcontext().prec = 30

# Christmas Tree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        initial_polygon = Polygon([
            (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5),
            (0.2, 0.25), (0.1, 0.25), (0.35, 0.0),
            (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2),
            (-0.075, 0.0), (-0.35, 0.0), (-0.1, 0.25),
            (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

print("Classes defined")

In [None]:
# Load both submissions
valid_df = pd.read_csv('/home/code/experiments/002_valid_submission/submission.csv')
touching_df = pd.read_csv('/home/code/experiments/002_valid_ensemble/submission.csv')

print(f"Valid submission: {len(valid_df)} rows")
print(f"Touching submission: {len(touching_df)} rows")

In [None]:
def get_bounding_box_side(trees):
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

def get_min_distance(trees):
    if len(trees) <= 1:
        return float('inf')
    min_dist = float('inf')
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        min_dist = min(min_dist, dist)
    return min_dist

# Compare scores for each N
comparison = []
for n in range(1, 201):
    valid_trees = load_trees_for_n(valid_df, n)
    touching_trees = load_trees_for_n(touching_df, n)
    
    valid_side = get_bounding_box_side(valid_trees)
    touching_side = get_bounding_box_side(touching_trees)
    
    valid_score = (valid_side ** 2) / n
    touching_score = (touching_side ** 2) / n
    
    valid_min_dist = get_min_distance(valid_trees)
    touching_min_dist = get_min_distance(touching_trees)
    
    comparison.append({
        'n': n,
        'valid_score': valid_score,
        'touching_score': touching_score,
        'gap': valid_score - touching_score,
        'valid_min_dist': valid_min_dist,
        'touching_min_dist': touching_min_dist
    })

comp_df = pd.DataFrame(comparison)
print(f"Total valid score: {comp_df['valid_score'].sum():.6f}")
print(f"Total touching score: {comp_df['touching_score'].sum():.6f}")
print(f"Total gap: {comp_df['gap'].sum():.6f}")

In [None]:
# Find N values with largest gaps
comp_df_sorted = comp_df.sort_values('gap', ascending=False)
print("Top 20 N values with largest gap (valid - touching):")
print(comp_df_sorted.head(20).to_string(index=False))

In [None]:
# Analyze: which N values have the worst valid scores?
comp_df['valid_efficiency'] = comp_df['valid_score'] / comp_df['n']
print("\nN values with worst valid efficiency (score/n):")
print(comp_df.sort_values('valid_efficiency', ascending=False).head(20)[['n', 'valid_score', 'valid_efficiency']].to_string(index=False))

In [None]:
# Key insight: The gap comes from configurations where touching trees were separated
# Let's see if we can apply micro-separation to touching trees

# For a touching configuration, we need to:
# 1. Find all pairs of touching trees
# 2. Calculate the minimum separation vector
# 3. Apply half the vector to each tree
# 4. Re-optimize the bounding box rotation

print("\nAnalyzing touching configurations...")
print(f"N values where touching_min_dist < 1e-9: {(comp_df['touching_min_dist'] < 1e-9).sum()}")
print(f"N values where valid_min_dist < 1e-9: {(comp_df['valid_min_dist'] < 1e-9).sum()}")