# Evolver Loop 4 Analysis

## Key Questions:
1. Why do pre-optimized snapshots have overlaps that Kaggle detects but we don't?
2. Can we fix the overlap issue in the 70.6 baseline?
3. What's the best path forward?

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import json
import os

getcontext().prec = 30
SCALE = Decimal('1e18')  # Higher precision than 1e15

print('Loaded libraries')

In [None]:
# Load the valid baseline that was rejected by Kaggle
baseline_path = '/home/nonroot/snapshots/santa-2025/21329067673/submission/submission.csv'
df = pd.read_csv(baseline_path)
print(f'Loaded baseline: {len(df)} rows')
print(df.head())

In [None]:
# Parse the submission
def parse_value(v):
    if isinstance(v, str) and v.startswith('s'):
        return float(v[1:])
    return float(v)

df['x_val'] = df['x'].apply(parse_value)
df['y_val'] = df['y'].apply(parse_value)
df['deg_val'] = df['deg'].apply(parse_value)
df['N'] = df['id'].str.split('_').str[0].astype(int)
df['tree_idx'] = df['id'].str.split('_').str[1].astype(int)

print('Parsed values')
print(df.head())

In [None]:
# Check which N value Kaggle said had overlaps
# exp_001 was rejected with 'Overlapping trees in group 151'
# Let's examine N=151

n_to_check = 151
group = df[df['N'] == n_to_check].copy()
print(f'N={n_to_check}: {len(group)} trees')
print(group[['x_val', 'y_val', 'deg_val']].head(10))

In [None]:
# Create tree polygons with HIGH precision
def create_tree_polygon_high_precision(x, y, angle):
    """Create tree polygon with very high precision."""
    # Tree vertices (same as getting-started kernel)
    trunk_w = Decimal('0.15')
    trunk_h = Decimal('0.2')
    base_w = Decimal('0.7')
    mid_w = Decimal('0.4')
    top_w = Decimal('0.25')
    tip_y = Decimal('0.8')
    tier_1_y = Decimal('0.5')
    tier_2_y = Decimal('0.25')
    base_y = Decimal('0.0')
    trunk_bottom_y = -trunk_h
    
    # Original vertices (before rotation/translation)
    vertices = [
        (Decimal('0.0'), tip_y),
        (top_w / 2, tier_1_y),
        (top_w / 4, tier_1_y),
        (mid_w / 2, tier_2_y),
        (mid_w / 4, tier_2_y),
        (base_w / 2, base_y),
        (trunk_w / 2, base_y),
        (trunk_w / 2, trunk_bottom_y),
        (-trunk_w / 2, trunk_bottom_y),
        (-trunk_w / 2, base_y),
        (-base_w / 2, base_y),
        (-mid_w / 4, tier_2_y),
        (-mid_w / 2, tier_2_y),
        (-top_w / 4, tier_1_y),
        (-top_w / 2, tier_1_y),
    ]
    
    # Convert to high precision
    x_dec = Decimal(str(x))
    y_dec = Decimal(str(y))
    angle_rad = Decimal(str(angle)) * Decimal(str(np.pi)) / Decimal('180')
    
    import math
    cos_a = Decimal(str(math.cos(float(angle_rad))))
    sin_a = Decimal(str(math.sin(float(angle_rad))))
    
    # Rotate and translate
    transformed = []
    for vx, vy in vertices:
        rx = vx * cos_a - vy * sin_a + x_dec
        ry = vx * sin_a + vy * cos_a + y_dec
        transformed.append((float(rx), float(ry)))
    
    return Polygon(transformed)

print('High precision polygon function defined')

In [None]:
# Check for overlaps in N=151 with high precision
polygons = []
for _, row in group.iterrows():
    poly = create_tree_polygon_high_precision(row['x_val'], row['y_val'], row['deg_val'])
    polygons.append(poly)

print(f'Created {len(polygons)} polygons')

# Check all pairs for overlap
overlap_pairs = []
for i in range(len(polygons)):
    for j in range(i+1, len(polygons)):
        if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
            intersection = polygons[i].intersection(polygons[j])
            area = intersection.area
            overlap_pairs.append((i, j, area))

print(f'Found {len(overlap_pairs)} overlapping pairs')
if overlap_pairs:
    for i, j, area in overlap_pairs[:5]:
        print(f'  Trees {i} and {j}: intersection area = {area:.2e}')

In [None]:
# Let's check with even stricter tolerance using integer coordinates
def check_overlap_integer_coords(trees_df):
    """Check overlaps using integer coordinates (Kaggle's method)."""
    SCALE = 10**15  # Same as getting-started kernel
    
    polygons = []
    for _, row in trees_df.iterrows():
        x, y, angle = row['x_val'], row['y_val'], row['deg_val']
        
        # Tree vertices
        TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
        TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
        
        angle_rad = angle * np.pi / 180
        cos_a = np.cos(angle_rad)
        sin_a = np.sin(angle_rad)
        
        coords = []
        for vx, vy in zip(TX, TY):
            rx = vx * cos_a - vy * sin_a + x
            ry = vx * sin_a + vy * cos_a + y
            # Scale to integer
            coords.append((int(rx * SCALE), int(ry * SCALE)))
        
        polygons.append(Polygon(coords))
    
    # Check overlaps
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                overlaps.append((i, j))
    
    return overlaps

overlaps = check_overlap_integer_coords(group)
print(f'Integer coord check: {len(overlaps)} overlapping pairs')

In [None]:
# Let's check ALL N values to see which ones have overlaps
print('Checking all N values for overlaps...')
overlapping_n = []

for n in range(1, 201):
    group = df[df['N'] == n].copy()
    overlaps = check_overlap_integer_coords(group)
    if overlaps:
        overlapping_n.append((n, len(overlaps)))

print(f'\nN values with overlaps: {len(overlapping_n)}')
for n, count in overlapping_n[:20]:
    print(f'  N={n}: {count} overlapping pairs')

In [None]:
# The issue is clear: the pre-optimized snapshots have subtle overlaps
# that our validation misses but Kaggle detects.

# SOLUTION: We need to either:
# 1. Fix the overlaps by slightly separating trees
# 2. Use a different baseline that has no overlaps
# 3. Generate solutions from scratch with guaranteed no overlaps

# Let's check the greedy solution we created (exp_003)
greedy_path = '/home/code/experiments/003_greedy_from_scratch/submission.csv'
greedy_df = pd.read_csv(greedy_path)
greedy_df['x_val'] = greedy_df['x'].apply(parse_value)
greedy_df['y_val'] = greedy_df['y'].apply(parse_value)
greedy_df['deg_val'] = greedy_df['deg'].apply(parse_value)
greedy_df['N'] = greedy_df['id'].str.split('_').str[0].astype(int)

print('Checking greedy solution for overlaps...')
greedy_overlapping = []
for n in range(1, 201):
    group = greedy_df[greedy_df['N'] == n].copy()
    overlaps = check_overlap_integer_coords(group)
    if overlaps:
        greedy_overlapping.append((n, len(overlaps)))

print(f'Greedy solution overlapping N values: {len(greedy_overlapping)}')
if greedy_overlapping:
    for n, count in greedy_overlapping[:10]:
        print(f'  N={n}: {count} overlapping pairs')
else:
    print('âœ“ Greedy solution has NO overlaps!')

In [None]:
# Compare per-N scores between greedy and baseline
print('\nPer-N score comparison (greedy vs baseline):')
print('=' * 60)

# Load greedy metrics
with open('/home/code/experiments/003_greedy_from_scratch/metrics.json') as f:
    greedy_metrics = json.load(f)

greedy_per_n = greedy_metrics['per_n_scores']

# Calculate baseline per-N scores
def calculate_side_length(trees_df):
    TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
    TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
    
    all_x, all_y = [], []
    for _, row in trees_df.iterrows():
        x, y, angle = row['x_val'], row['y_val'], row['deg_val']
        angle_rad = angle * np.pi / 180
        cos_a = np.cos(angle_rad)
        sin_a = np.sin(angle_rad)
        
        for vx, vy in zip(TX, TY):
            rx = vx * cos_a - vy * sin_a + x
            ry = vx * sin_a + vy * cos_a + y
            all_x.append(rx)
            all_y.append(ry)
    
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

baseline_per_n = {}
for n in range(1, 201):
    group = df[df['N'] == n].copy()
    side = calculate_side_length(group)
    baseline_per_n[n] = side**2 / n

# Compare
print(f'{"N":>4} {"Greedy":>12} {"Baseline":>12} {"Gap":>12} {"% Gap":>8}')
print('-' * 60)

total_gap = 0
for n in [1, 2, 3, 4, 5, 10, 20, 50, 100, 200]:
    g = greedy_per_n[str(n)]
    b = baseline_per_n[n]
    gap = g - b
    pct = gap / b * 100 if b > 0 else 0
    total_gap += gap
    print(f'{n:4d} {g:12.6f} {b:12.6f} {gap:12.6f} {pct:7.1f}%')

print('-' * 60)
print(f'Total greedy: {sum(float(v) for v in greedy_per_n.values()):.6f}')
print(f'Total baseline: {sum(baseline_per_n.values()):.6f}')

In [None]:
# KEY INSIGHT: The greedy solution is VALID but scores 169.5 vs baseline 70.6
# The gap is ~99 points!

# The path forward is clear:
# 1. Submit the greedy solution to confirm it's accepted (score ~169.5)
# 2. Implement simulated annealing ON TOP of greedy to improve it
# 3. Target: get from 169.5 down to <70 through optimization

# Let's also check if we can use the ensemble approach:
# - Take best per-N from multiple sources
# - The greedy solution has VALID (no overlap) solutions for all N
# - We can use greedy as a fallback for any N where other sources have overlaps

print('\n=== STRATEGY SUMMARY ===')
print('1. Greedy solution: 169.5 (VALID, no overlaps)')
print('2. Baseline: 70.6 (INVALID, has overlaps Kaggle detects)')
print('3. Target: 68.89')
print('')
print('Options:')
print('A) Fix baseline overlaps by separating trees slightly')
print('B) Implement SA on greedy to improve from 169.5 to <70')
print('C) Hybrid: use greedy for N values where baseline has overlaps')
print('')
print('Recommendation: Option C is fastest - create hybrid submission')