# Evolver Loop 1 Analysis

## Issue: Submission failed with 'Overlapping trees in group 126'

The baseline submission from snapshot 21116303805 was rejected by Kaggle due to overlapping trees.
We found that snapshot 21328309254 has a VALID submission that scored 70.647327 on LB.

Let's verify this valid submission and understand the precision requirements.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.ops import unary_union
from decimal import Decimal, getcontext
import json

getcontext().prec = 30

# Load the VALID submission
valid_path = '/home/code/experiments/000_baseline/submission_valid.csv'
df = pd.read_csv(valid_path)
print(f"Loaded {len(df)} rows")
df.head()

In [None]:
# Parse values
def parse_s_value(s_val):
    if isinstance(s_val, str) and s_val.startswith('s'):
        return s_val[1:]  # Keep as string for precision
    return str(s_val)

df['x_str'] = df['x'].apply(parse_s_value)
df['y_str'] = df['y'].apply(parse_s_value)
df['deg_str'] = df['deg'].apply(parse_s_value)

# Convert to float for calculations
df['x_val'] = df['x_str'].astype(float)
df['y_val'] = df['y_str'].astype(float)
df['deg_val'] = df['deg_str'].astype(float)

df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"N range: {df['n'].min()} to {df['n'].max()}")
print(f"\nSample precision (x values):")
for n in [1, 2, 126]:
    sample = df[df['n'] == n].iloc[0]
    print(f"  n={n}: x={sample['x_str']} ({len(sample['x_str'])} chars)")

In [None]:
# Create correct tree polygon
def get_tree_polygon():
    trunk_w = Decimal('0.15')
    trunk_h = Decimal('0.2')
    base_w = Decimal('0.7')
    mid_w = Decimal('0.4')
    top_w = Decimal('0.25')
    tip_y = Decimal('0.8')
    tier_1_y = Decimal('0.5')
    tier_2_y = Decimal('0.25')
    base_y = Decimal('0.0')
    trunk_bottom_y = -trunk_h

    vertices = [
        (float(Decimal('0.0')), float(tip_y)),
        (float(top_w / Decimal('2')), float(tier_1_y)),
        (float(top_w / Decimal('4')), float(tier_1_y)),
        (float(mid_w / Decimal('2')), float(tier_2_y)),
        (float(mid_w / Decimal('4')), float(tier_2_y)),
        (float(base_w / Decimal('2')), float(base_y)),
        (float(trunk_w / Decimal('2')), float(base_y)),
        (float(trunk_w / Decimal('2')), float(trunk_bottom_y)),
        (float(-(trunk_w / Decimal('2'))), float(trunk_bottom_y)),
        (float(-(trunk_w / Decimal('2'))), float(base_y)),
        (float(-(base_w / Decimal('2'))), float(base_y)),
        (float(-(mid_w / Decimal('4'))), float(tier_2_y)),
        (float(-(mid_w / Decimal('2'))), float(tier_2_y)),
        (float(-(top_w / Decimal('4'))), float(tier_1_y)),
        (float(-(top_w / Decimal('2'))), float(tier_1_y)),
    ]
    return Polygon(vertices)

TREE_POLY = get_tree_polygon()
print(f"Tree polygon: {len(TREE_POLY.exterior.coords)} vertices")
print(f"Bounds: {TREE_POLY.bounds}")
print(f"Area: {TREE_POLY.area:.6f}")

In [None]:
def create_tree_at_position(x, y, deg):
    tree = rotate(TREE_POLY, deg, origin=(0, 0))
    tree = translate(tree, x, y)
    return tree

def get_bounding_box_side(polygons):
    if not polygons:
        return 0
    combined = unary_union(polygons)
    bounds = combined.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

# Compute score for all n
scores_by_n = {}
for n in range(1, 201):
    n_df = df[df['n'] == n]
    if len(n_df) != n:
        print(f"ERROR: n={n} has {len(n_df)} trees")
        continue
    
    polygons = [create_tree_at_position(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in n_df.iterrows()]
    
    side = get_bounding_box_side(polygons)
    score_n = (side ** 2) / n
    scores_by_n[n] = {'side': side, 'score': score_n}
    
    if n <= 5 or n % 50 == 0:
        print(f"n={n:3d}: side={side:.6f}, score={score_n:.6f}")

total_score = sum(s['score'] for s in scores_by_n.values())
print(f"\n{'='*50}")
print(f"TOTAL SCORE: {total_score:.6f}")
print(f"{'='*50}")
print(f"\nTarget: 68.888293")
print(f"Gap: {total_score - 68.888293:.6f}")

In [None]:
# Score breakdown by range
print("\nScore breakdown by n-range:")
print("-" * 40)
ranges = [(1, 10), (11, 20), (21, 50), (51, 100), (101, 150), (151, 200)]
for start, end in ranges:
    range_score = sum(scores_by_n[n]['score'] for n in range(start, end+1))
    print(f"n={start:3d}-{end:3d}: {range_score:.6f}")
print("-" * 40)
print(f"Total: {total_score:.6f}")

In [None]:
# Update metrics.json with correct score
metrics = {
    'cv_score': total_score,
    'total_score': total_score,
    'overlap_errors': 0,
    'target': 68.888293,
    'gap': total_score - 68.888293,
    'source': 'snapshot_21328309254_valid_submission'
}

with open('/home/code/experiments/000_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Updated metrics.json")
print(json.dumps(metrics, indent=2))