# Evolver Loop 4 Analysis

## Problem: Submission Failed with Overlapping Trees in Group 042

The better baseline (70.647306) from snapshot 21165874980 was rejected by Kaggle due to overlapping trees in group 042 (N=42). Our local validation didn't catch this.

**Key Questions:**
1. Why did our local validation miss the overlap?
2. What is the actual overlap in group 042?
3. Can we fix it?
4. What is our best VALID submission?

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import os

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
BASE_TREE = Polygon(zip(TX, TY))

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    tree = affinity.rotate(BASE_TREE, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

print("Functions defined")

Functions defined


In [2]:
# Load the failed submission
failed_file = '/home/nonroot/snapshots/santa-2025/21165874980/code/submission_candidates/candidate_001.csv'
df = pd.read_csv(failed_file)
print(f"Loaded {len(df)} rows")

Loaded 20100 rows


In [3]:
# Check group 042 specifically with STRICTER overlap detection
prefix = '042_'
n_trees = df[df['id'].str.startswith(prefix)]
print(f"N=42 has {len(n_trees)} trees")

polygons = []
for _, row in n_trees.iterrows():
    x = parse_value(row['x'])
    y = parse_value(row['y'])
    deg = parse_value(row['deg'])
    poly = create_tree_polygon(x, y, deg)
    polygons.append((row['id'], poly))

# Check all pairs with VERY strict threshold
overlaps = []
for i in range(len(polygons)):
    for j in range(i+1, len(polygons)):
        id_i, poly_i = polygons[i]
        id_j, poly_j = polygons[j]
        if poly_i.intersects(poly_j):
            inter = poly_i.intersection(poly_j)
            if inter.area > 0:  # ANY overlap
                overlaps.append((id_i, id_j, inter.area))
                
print(f"Found {len(overlaps)} overlaps in N=42:")
for id_i, id_j, area in overlaps:
    print(f"  {id_i} and {id_j}: area = {area:.15f}")

N=42 has 42 trees
Found 0 overlaps in N=42:


In [4]:
# Check with even stricter - use buffer to detect near-overlaps
print("\nChecking with buffer for near-overlaps:")
for i in range(len(polygons)):
    for j in range(i+1, len(polygons)):
        id_i, poly_i = polygons[i]
        id_j, poly_j = polygons[j]
        # Check if they're touching or very close
        dist = poly_i.distance(poly_j)
        if dist < 1e-6:
            inter = poly_i.intersection(poly_j)
            print(f"  {id_i} and {id_j}: distance = {dist:.15f}, intersection area = {inter.area:.15f}")


Checking with buffer for near-overlaps:
  042_0 and 042_1: distance = 0.000000000000015, intersection area = 0.000000000000000
  042_0 and 042_7: distance = 0.000000000000019, intersection area = 0.000000000000000
  042_0 and 042_30: distance = 0.000000000066624, intersection area = 0.000000000000000
  042_0 and 042_35: distance = 0.000000000000037, intersection area = 0.000000000000000
  042_1 and 042_39: distance = 0.000000000000043, intersection area = 0.000000000000000
  042_2 and 042_6: distance = 0.000000000000079, intersection area = 0.000000000000000
  042_2 and 042_17: distance = 0.000000000000002, intersection area = 0.000000000000000
  042_2 and 042_36: distance = 0.000000000000009, intersection area = 0.000000000000000
  042_3 and 042_10: distance = 0.000000000000002, intersection area = 0.000000000000000
  042_3 and 042_12: distance = 0.000000000000002, intersection area = 0.000000000000000
  042_3 and 042_17: distance = 0.000000000000000, intersection area = 0.0000000000

In [5]:
# Let's check ALL groups for overlaps with zero tolerance
def check_all_overlaps_strict(df):
    overlaps_found = []
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        n_trees = df[df['id'].str.startswith(prefix)]
        if len(n_trees) != n:
            continue
        polygons = []
        for _, row in n_trees.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            poly = create_tree_polygon(x, y, deg)
            polygons.append((row['id'], poly))
        for i in range(len(polygons)):
            for j in range(i+1, len(polygons)):
                id_i, poly_i = polygons[i]
                id_j, poly_j = polygons[j]
                if poly_i.intersects(poly_j) and not poly_i.touches(poly_j):
                    inter = poly_i.intersection(poly_j)
                    if inter.area > 0:  # ANY positive area
                        overlaps_found.append((n, id_i, id_j, inter.area))
    return overlaps_found

print("Checking ALL groups for overlaps (strict)...")
all_overlaps = check_all_overlaps_strict(df)
print(f"Found {len(all_overlaps)} total overlaps:")
for n, id_i, id_j, area in all_overlaps:
    print(f"  N={n}: {id_i} and {id_j}, area = {area:.15f}")

Checking ALL groups for overlaps (strict)...


Found 0 total overlaps:


In [6]:
# Now let's check our KNOWN GOOD submissions
print("\n" + "="*60)
print("Checking known good submissions:")
print("="*60)

# The 70.658891 submission that passed LB
good_file = '/home/nonroot/snapshots/santa-2025/21164519357/code/exploration/santa-2025.csv'
df_good = pd.read_csv(good_file)
print(f"\nChecking {good_file}...")
good_overlaps = check_all_overlaps_strict(df_good)
print(f"Found {len(good_overlaps)} overlaps")

# The original baseline
baseline_file = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df_baseline = pd.read_csv(baseline_file)
print(f"\nChecking {baseline_file}...")
baseline_overlaps = check_all_overlaps_strict(df_baseline)
print(f"Found {len(baseline_overlaps)} overlaps")


Checking known good submissions:

Checking /home/nonroot/snapshots/santa-2025/21164519357/code/exploration/santa-2025.csv...


Found 0 overlaps

Checking /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv...


Found 0 overlaps


In [7]:
# Calculate scores for the good submissions
def calculate_score(df):
    total_score = 0
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        n_trees = df[df['id'].str.startswith(prefix)]
        if len(n_trees) != n:
            continue
        all_coords = []
        for _, row in n_trees.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            poly = create_tree_polygon(x, y, deg)
            coords = np.array(poly.exterior.coords)
            all_coords.append(coords)
        all_coords = np.vstack(all_coords)
        x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
        y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
        side = max(x_range, y_range)
        total_score += side**2 / n
    return total_score

print("\nScores:")
print(f"Good submission (70.658891): {calculate_score(df_good):.6f}")
print(f"Baseline (70.676102): {calculate_score(df_baseline):.6f}")
print(f"Failed submission: {calculate_score(df):.6f}")


Scores:


Good submission (70.658891): 70.658891


Baseline (70.676102): 70.676102


Failed submission: 70.647306


In [8]:
# Let's look at what the Kaggle validator might be doing differently
# Perhaps they use a different precision or algorithm

# Check the exact coordinates in group 042
print("\nExact coordinates for N=42:")
for _, row in n_trees.head(10).iterrows():
    x = parse_value(row['x'])
    y = parse_value(row['y'])
    deg = parse_value(row['deg'])
    print(f"{row['id']}: x={x:.15f}, y={y:.15f}, deg={deg:.15f}")


Exact coordinates for N=42:
042_0: x=1.747533353797365, y=-0.611676768965902, deg=84.643132034986920
042_1: x=1.747801675800098, y=-0.007535041242537, deg=126.027371818347092
042_2: x=-1.142327623636033, y=0.748057350043896, deg=66.743364515377237
042_3: x=0.040329541205713, y=0.698664845175687, deg=96.190503401038725
042_4: x=-0.836786630187964, y=1.351307626548424, deg=16623.662758960035717
042_5: x=-0.924439691494856, y=-0.789981953403887, deg=249.288487955135196
042_6: x=-1.650886555946404, y=0.410841532391147, deg=23.629377894724080
042_7: x=0.804120931319439, y=-1.039937420311431, deg=287.859232904795533
042_8: x=-0.538962792887715, y=-1.896394610900443, deg=246.366486619751640
042_9: x=-0.340219700241197, y=-1.446684061242404, deg=65.641764500625158


In [9]:
# Let's try using a different overlap detection method
# Kaggle might use a simpler/different algorithm

from shapely.validation import make_valid

def check_overlaps_alternative(df, n):
    """Check overlaps using different methods"""
    prefix = f"{n:03d}_"
    n_trees = df[df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in n_trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        polygons.append((row['id'], poly))
    
    print(f"\nN={n}: Checking {len(polygons)} trees")
    
    # Method 1: Standard intersection
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            id_i, poly_i = polygons[i]
            id_j, poly_j = polygons[j]
            
            # Check intersection
            if poly_i.intersects(poly_j):
                inter = poly_i.intersection(poly_j)
                if inter.area > 1e-15:  # Very strict
                    print(f"  OVERLAP: {id_i} & {id_j}, area={inter.area:.20f}")
                elif not poly_i.touches(poly_j):
                    print(f"  NEAR: {id_i} & {id_j}, touches={poly_i.touches(poly_j)}, area={inter.area:.20f}")

check_overlaps_alternative(df, 42)


N=42: Checking 42 trees


In [10]:
# Let's check if there's a precision issue with the coordinates
# Maybe the 's' prefix parsing is causing issues

print("\nRaw data for N=42:")
for _, row in n_trees.head(5).iterrows():
    print(f"{row['id']}: x='{row['x']}', y='{row['y']}', deg='{row['deg']}'")

# Check if there are any unusual values
print("\nChecking for unusual values in N=42:")
for _, row in n_trees.iterrows():
    x = parse_value(row['x'])
    y = parse_value(row['y'])
    deg = parse_value(row['deg'])
    if abs(x) > 100 or abs(y) > 100:
        print(f"  Large coordinate: {row['id']}: x={x}, y={y}")
    if deg < 0 or deg > 360:
        print(f"  Unusual angle: {row['id']}: deg={deg}")


Raw data for N=42:
042_0: x='s1.7475333537973645', y='s-0.6116767689659015', deg='s84.64313203498692'
042_1: x='s1.7478016758000978', y='s-0.007535041242536562', deg='s126.02737181834709'
042_2: x='s-1.1423276236360327', y='s0.7480573500438955', deg='s66.74336451537724'
042_3: x='s0.04032954120571284', y='s0.6986648451756872', deg='s96.19050340103873'
042_4: x='s-0.8367866301879636', y='s1.3513076265484243', deg='s16623.662758960036'

Checking for unusual values in N=42:
  Unusual angle: 042_4: deg=16623.662758960036
  Unusual angle: 042_32: deg=-833.0016095256168


In [None]:
# Let's use a completely different approach - check using the exact same method
# that the competition likely uses

# The competition scoring code probably uses a simple polygon intersection check
# Let's implement it from scratch

def point_in_polygon(point, polygon_coords):
    """Ray casting algorithm"""
    x, y = point
    n = len(polygon_coords)
    inside = False
    j = n - 1
    for i in range(n):
        xi, yi = polygon_coords[i]
        xj, yj = polygon_coords[j]
        if ((yi > y) != (yj > y)) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi):
            inside = not inside
        j = i
    return inside

def get_tree_coords(x, y, deg):
    """Get rotated and translated tree coordinates"""
    import math
    rad = math.radians(deg)
    cos_a = math.cos(rad)
    sin_a = math.sin(rad)
    coords = []
    for tx, ty in zip(TX, TY):
        # Rotate
        rx = tx * cos_a - ty * sin_a
        ry = tx * sin_a + ty * cos_a
        # Translate
        coords.append((rx + x, ry + y))
    return coords

# Check if any vertex of one tree is inside another
def check_vertex_overlap(df, n):
    prefix = f"{n:03d}_"
    n_trees = df[df['id'].str.startswith(prefix)]
    
    trees = []
    for _, row in n_trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        coords = get_tree_coords(x, y, deg)
        trees.append((row['id'], coords))
    
    print(f"\nN={n}: Vertex-in-polygon check")
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            id_i, coords_i = trees[i]
            id_j, coords_j = trees[j]
            
            # Check if any vertex of i is inside j
            for pt in coords_i:
                if point_in_polygon(pt, coords_j):
                    print(f"  OVERLAP: vertex of {id_i} inside {id_j}")
                    break
            
            # Check if any vertex of j is inside i
            for pt in coords_j:
                if point_in_polygon(pt, coords_i):
                    print(f"  OVERLAP: vertex of {id_j} inside {id_i}")
                    break

check_vertex_overlap(df, 42)

In [None]:
# The unusual angles might be the issue!
# Let's check if normalizing angles to 0-360 changes anything

def normalize_angle(deg):
    """Normalize angle to 0-360 range"""
    return deg % 360

# Check all unusual angles in the failed submission
print("All unusual angles in failed submission:")
for _, row in df.iterrows():
    deg = parse_value(row['deg'])
    if deg < 0 or deg > 360:
        norm_deg = normalize_angle(deg)
        print(f"  {row['id']}: deg={deg:.6f} -> normalized={norm_deg:.6f}")