# Evolver Loop 2 Analysis

## Key Issues to Address:
1. First submission (exp_000) was REJECTED with "Overlapping trees in group 040"
2. Current submission (exp_001) from snapshot 21145966992 has 72 N values with overlaps according to evaluator
3. Need to understand Kaggle's validation method and find a valid baseline

## Key Insight from Kernels:
Kaggle uses **integer scaling (1e18)** for overlap detection to avoid floating-point precision issues.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
from shapely.ops import unary_union
import os

# Set precision for Decimal (25 is good for contest standards)
getcontext().prec = 25
scale_factor = Decimal("1e18")

print(f"Using scale factor: {scale_factor}")
print("This matches Kaggle's validation method!")

Using scale factor: 1E+18
This matches Kaggle's validation method!


In [2]:
# Tree dimensions - EXACT same as Kaggle
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""
    
    def __init__(self, center_x="0", center_y="0", angle="0"):
        """Initializes the Christmas tree with a specific position and rotation."""
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)
        
        # Tree dimensions
        trunk_w = Decimal("0.15")
        trunk_h = Decimal("0.2")
        base_w = Decimal("0.7")
        mid_w = Decimal("0.4")
        top_w = Decimal("0.25")
        tip_y = Decimal("0.8")
        tier_1_y = Decimal("0.5")
        tier_2_y = Decimal("0.25")
        base_y = Decimal("0.0")
        trunk_bottom_y = -trunk_h
        
        # Define the 15 vertices of the tree polygon
        initial_polygon = Polygon([
            (Decimal("0.0") * scale_factor, tip_y * scale_factor),
            (top_w / Decimal("2") * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal("4") * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal("2") * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal("4") * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal("4")) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal("2")) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal("4")) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal("2")) * scale_factor, tier_1_y * scale_factor),
        ])
        
        # Apply rotation and translation
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated,
            xoff=float(self.center_x * scale_factor),
            yoff=float(self.center_y * scale_factor)
        )

print("ChristmasTree class defined with Kaggle-compatible scaling!")

ChristmasTree class defined with Kaggle-compatible scaling!


In [3]:
def has_overlap_kaggle(trees):
    """Check if any two ChristmasTree polygons overlap - KAGGLE METHOD."""
    if len(trees) <= 1:
        return False, []
    
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    overlapping_pairs = []
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx <= i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                overlapping_pairs.append((i, idx))
    
    return len(overlapping_pairs) > 0, overlapping_pairs

def load_trees_for_n(n, df):
    """Load all trees for a given N from the submission DataFrame."""
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"]).lstrip('s')
        y = str(row["y"]).lstrip('s')
        deg = str(row["deg"]).lstrip('s')
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

print("Kaggle-compatible overlap detection defined!")

Kaggle-compatible overlap detection defined!


In [4]:
# Test on current submission
df = pd.read_csv('/home/submission/submission.csv')
print(f"Loaded {len(df)} rows from current submission")

# Check N=40 specifically (the one that failed)
n = 40
trees_40 = load_trees_for_n(n, df)
has_overlap, pairs = has_overlap_kaggle(trees_40)
print(f"\nN=40: {len(trees_40)} trees")
print(f"Has overlap (Kaggle method): {has_overlap}")
if pairs:
    print(f"Overlapping pairs: {pairs[:5]}...")

Loaded 20100 rows from current submission

N=40: 40 trees
Has overlap (Kaggle method): True
Overlapping pairs: [(0, 20), (1, 22), (1, 11), (2, 38), (2, 21)]...


In [5]:
# Check ALL N values for overlaps using Kaggle method
print("Checking all N values for overlaps (Kaggle method)...")
overlapping_ns = []
for n in range(1, 201):
    trees = load_trees_for_n(n, df)
    has_overlap, pairs = has_overlap_kaggle(trees)
    if has_overlap:
        overlapping_ns.append((n, len(pairs)))
        if len(overlapping_ns) <= 10:
            print(f"  N={n}: {len(pairs)} overlapping pairs")

print(f"\nTotal N values with overlaps: {len(overlapping_ns)}")
if overlapping_ns:
    print(f"First 10: {overlapping_ns[:10]}")
else:
    print("NO OVERLAPS DETECTED - This submission should pass Kaggle validation!")

Checking all N values for overlaps (Kaggle method)...
  N=2: 1 overlapping pairs
  N=4: 4 overlapping pairs
  N=5: 10 overlapping pairs
  N=16: 21 overlapping pairs


  N=35: 50 overlapping pairs
  N=36: 57 overlapping pairs
  N=40: 37 overlapping pairs
  N=46: 39 overlapping pairs
  N=47: 40 overlapping pairs
  N=48: 48 overlapping pairs



Total N values with overlaps: 72
First 10: [(2, 1), (4, 4), (5, 10), (16, 21), (35, 50), (36, 57), (40, 37), (46, 39), (47, 40), (48, 48)]


In [None]:
# Now check the original baseline that was rejected
original_baseline_path = '/home/nonroot/snapshots/santa-2025/21331543270/submission/submission.csv'
df_orig = pd.read_csv(original_baseline_path)
print(f"Checking original baseline (21331543270) that was rejected...")

overlapping_ns_orig = []
for n in range(1, 201):
    trees = load_trees_for_n(n, df_orig)
    has_overlap, pairs = has_overlap_kaggle(trees)
    if has_overlap:
        overlapping_ns_orig.append((n, len(pairs)))

print(f"Total N values with overlaps: {len(overlapping_ns_orig)}")
if overlapping_ns_orig:
    print(f"Overlapping N values: {[x[0] for x in overlapping_ns_orig]}")
else:
    print("NO OVERLAPS - Why did Kaggle reject this?")

In [None]:
# Check the recommended valid snapshots
valid_snapshots = ['21329067673', '21328310479']

for snap_id in valid_snapshots:
    snap_path = f'/home/nonroot/snapshots/santa-2025/{snap_id}/submission/submission.csv'
    if os.path.exists(snap_path):
        df_snap = pd.read_csv(snap_path)
        
        overlapping_ns_snap = []
        for n in range(1, 201):
            trees = load_trees_for_n(n, df_snap)
            has_overlap, pairs = has_overlap_kaggle(trees)
            if has_overlap:
                overlapping_ns_snap.append((n, len(pairs)))
        
        # Calculate score
        total_score = 0
        for n in range(1, 201):
            trees = load_trees_for_n(n, df_snap)
            if trees:
                polygons = [t.polygon for t in trees]
                union = unary_union(polygons)
                bounds = union.bounds
                side = max(bounds[2] - bounds[0], bounds[3] - bounds[1]) / float(scale_factor)
                total_score += (side ** 2) / n
        
        print(f"\nSnapshot {snap_id}:")
        print(f"  Score: {total_score:.6f}")
        print(f"  Overlapping N values: {len(overlapping_ns_snap)}")
        if overlapping_ns_snap:
            print(f"  Which N: {[x[0] for x in overlapping_ns_snap[:10]]}...")

In [None]:
# Find the BEST valid snapshot (no overlaps, best score)
print("Searching for best valid snapshot...")

snapshot_base = '/home/nonroot/snapshots/santa-2025/'
snapshot_dirs = sorted(os.listdir(snapshot_base))

best_valid_snap = None
best_valid_score = float('inf')

for snap_dir in snapshot_dirs:
    sub_path = os.path.join(snapshot_base, snap_dir, 'submission', 'submission.csv')
    if not os.path.exists(sub_path):
        continue
    
    try:
        df_snap = pd.read_csv(sub_path)
        
        # Quick check for overlaps
        has_any_overlap = False
        for n in range(1, 201):
            trees = load_trees_for_n(n, df_snap)
            has_overlap, _ = has_overlap_kaggle(trees)
            if has_overlap:
                has_any_overlap = True
                break
        
        if has_any_overlap:
            continue
        
        # Calculate score
        total_score = 0
        for n in range(1, 201):
            trees = load_trees_for_n(n, df_snap)
            if trees:
                polygons = [t.polygon for t in trees]
                union = unary_union(polygons)
                bounds = union.bounds
                side = max(bounds[2] - bounds[0], bounds[3] - bounds[1]) / float(scale_factor)
                total_score += (side ** 2) / n
        
        print(f"Valid snapshot {snap_dir}: score={total_score:.6f}")
        
        if total_score < best_valid_score:
            best_valid_score = total_score
            best_valid_snap = snap_dir
            
    except Exception as e:
        pass

print(f"\n=== BEST VALID SNAPSHOT ===")
print(f"Snapshot: {best_valid_snap}")
print(f"Score: {best_valid_score:.6f}")