# Evolver Loop 1 Analysis

## Situation
- First submission failed with 'Overlapping trees in group 040'
- Need to find a valid submission and understand the overlap detection issue

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import os

getcontext().prec = 30

# Tree polygon definition (from validated scoring function)
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(0), float(tip_y)),
            (float(top_w / 2), float(tier_1_y)),
            (float(top_w / 4), float(tier_1_y)),
            (float(mid_w / 2), float(tier_2_y)),
            (float(mid_w / 4), float(tier_2_y)),
            (float(base_w / 2), float(base_y)),
            (float(trunk_w / 2), float(base_y)),
            (float(trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(base_y)),
            (float(-base_w / 2), float(base_y)),
            (float(-mid_w / 4), float(tier_2_y)),
            (float(-mid_w / 2), float(tier_2_y)),
            (float(-top_w / 4), float(tier_1_y)),
            (float(-top_w / 2), float(tier_1_y)),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        x = str(row['x']).lstrip('s')
        y = str(row['y']).lstrip('s')
        deg = str(row['deg']).lstrip('s')
        trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap(trees, tolerance=1e-12):
    """Check for overlaps with configurable tolerance."""
    if len(trees) <= 1:
        return False, None, None
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    intersection = poly.intersection(polygons[idx])
                    if intersection.area > tolerance:
                        return True, i, idx
    return False, None, None

def get_bounding_box_side(trees):
    if not trees:
        return 0
    all_coords = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_coords.append(coords)
    all_coords = np.vstack(all_coords)
    x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
    y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
    return max(x_range, y_range)

print("Functions defined.")

In [None]:
# Load the failed submission (candidate_000.csv) and check N=40 for overlaps
failed_file = '/home/code/submission_candidates/candidate_000.csv'
df_failed = pd.read_csv(failed_file)

print("Checking N=40 in failed submission...")
trees_40 = load_trees_for_n(df_failed, 40)
print(f"Number of trees: {len(trees_40)}")

# Check with different tolerances
for tol in [1e-12, 1e-10, 1e-8, 1e-6, 0]:
    has_ovlp, i, j = has_overlap(trees_40, tolerance=tol)
    if has_ovlp:
        print(f"Tolerance {tol}: OVERLAP between trees {i} and {j}")
    else:
        print(f"Tolerance {tol}: No overlap")

In [None]:
# Load the valid submission (candidate_024.csv from snapshot) and verify
valid_file = '/home/code/submission_candidates/candidate_001.csv'
df_valid = pd.read_csv(valid_file)

print("Checking N=40 in valid submission...")
trees_40_valid = load_trees_for_n(df_valid, 40)
print(f"Number of trees: {len(trees_40_valid)}")

# Check with different tolerances
for tol in [1e-12, 1e-10, 1e-8, 1e-6, 0]:
    has_ovlp, i, j = has_overlap(trees_40_valid, tolerance=tol)
    if has_ovlp:
        print(f"Tolerance {tol}: OVERLAP between trees {i} and {j}")
    else:
        print(f"Tolerance {tol}: No overlap")

In [None]:
# Score the valid submission
def score_submission(df, max_n=200):
    total_score = 0
    overlaps = []
    side_lengths = {}
    for n in range(1, max_n + 1):
        trees = load_trees_for_n(df, n)
        if len(trees) != n:
            print(f"Warning: n={n} has {len(trees)} trees instead of {n}")
            continue
        has_ovlp, _, _ = has_overlap(trees)
        if has_ovlp:
            overlaps.append(n)
        side = get_bounding_box_side(trees)
        side_lengths[n] = side
        score_n = (side ** 2) / n
        total_score += score_n
    return total_score, overlaps, side_lengths

print("Scoring valid submission...")
score, overlaps, side_lengths = score_submission(df_valid)
print(f"Score: {score:.6f}")
print(f"Overlapping configurations: {overlaps}")
print(f"Target: 68.897509")
print(f"Gap: {score - 68.897509:.6f}")

In [None]:
# Score breakdown by N range
print("\nScore breakdown by N range:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    range_score = sum((side_lengths[n]**2)/n for n in range(start, end+1))
    print(f"  N={start}-{end}: {range_score:.6f}")

print("\nTop 10 N values by score contribution:")
contributions = [(n, (side_lengths[n]**2)/n, side_lengths[n]) for n in range(1, 201)]
contributions.sort(key=lambda x: x[1], reverse=True)
for n, contrib, side in contributions[:10]:
    print(f"  N={n:3d}: contrib={contrib:.6f}, side={side:.6f}")

In [None]:
# Summary
print("="*60)
print("SUMMARY")
print("="*60)
print(f"Valid submission: candidate_001.csv (from snapshot candidate_024.csv)")
print(f"Score: {score:.6f}")
print(f"Overlaps: {len(overlaps)}")
print(f"Target: 68.897509")
print(f"Gap: {score - 68.897509:.6f} ({(score - 68.897509)/68.897509*100:.2f}%)")
print(f"\nThis submission was verified on LB with score 70.626088313081")
print("="*60)