# Evolver Loop 3 Analysis

## Issue: Submission Failed with "Overlapping trees in group 020"

The ensemble approach created overlapping trees. Need to:
1. Understand why overlaps occurred
2. Validate all sources for overlaps
3. Create a proper ensemble with overlap checking

In [1]:
import numpy as np
import pandas as pd
import os
import glob
from numba import njit
import math
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

print('Libraries loaded')

Libraries loaded


In [2]:
# Tree polygon template
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

tx, ty = make_polygon_template()
print('Scoring functions defined')

Scoring functions defined


In [3]:
# Function to create tree polygon
def create_tree_polygon(x, y, deg):
    """Create a shapely polygon for a tree at position (x, y) with rotation deg."""
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    
    vertices = [
        (0, tip),
        (ow/2, t1), (ow/4, t1),
        (mw/2, t2), (mw/4, t2),
        (bw/2, base), (tw/2, base),
        (tw/2, tbot), (-tw/2, tbot),
        (-tw/2, base), (-bw/2, base),
        (-mw/4, t2), (-mw/2, t2),
        (-ow/4, t1), (-ow/2, t1)
    ]
    
    poly = Polygon(vertices)
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x, yoff=y)
    return poly

def check_overlaps(xs, ys, degs):
    """Check if any trees overlap. Returns list of overlapping pairs."""
    n = len(xs)
    polys = [create_tree_polygon(xs[i], ys[i], degs[i]) for i in range(n)]
    overlaps = []
    
    for i in range(n):
        for j in range(i+1, n):
            # Check if polygons overlap (not just touch)
            if polys[i].overlaps(polys[j]) or polys[i].contains(polys[j]) or polys[j].contains(polys[i]):
                # Check if it's more than just touching
                intersection = polys[i].intersection(polys[j])
                if intersection.area > 1e-10:  # More than just touching
                    overlaps.append((i, j, intersection.area))
    
    return overlaps

print('Overlap checking functions defined')

Overlap checking functions defined


In [4]:
# List all available sources
sources = [
    '/home/code/preoptimized_submission.csv',
    '/home/code/datasets/santa-2025.csv',
    '/home/code/datasets/71.97.csv',
    '/home/code/datasets/72.49.csv',
    '/home/code/datasets/submission.csv',
    '/home/code/datasets/jazivxt_output/submission.csv',
    '/home/code/datasets/eazy_output/submission.csv',
    '/home/code/datasets/ashraful_output/submission.csv',
    '/home/code/datasets/bucket-of-chump/submission.csv',
    '/home/code/datasets/saspav_dataset/santa-2025.csv',
    '/home/code/datasets/chistyakov_output/submission.csv',
    '/home/code/datasets/chistyakov_packed/submission_best.csv',
    '/home/code/datasets/egortrushin_output/submission.csv',
]

existing_sources = [s for s in sources if os.path.exists(s)]
print(f'Found {len(existing_sources)} sources:')
for s in existing_sources:
    print(f'  {s}')

Found 13 sources:
  /home/code/preoptimized_submission.csv
  /home/code/datasets/santa-2025.csv
  /home/code/datasets/71.97.csv
  /home/code/datasets/72.49.csv
  /home/code/datasets/submission.csv
  /home/code/datasets/jazivxt_output/submission.csv
  /home/code/datasets/eazy_output/submission.csv
  /home/code/datasets/ashraful_output/submission.csv
  /home/code/datasets/bucket-of-chump/submission.csv
  /home/code/datasets/saspav_dataset/santa-2025.csv
  /home/code/datasets/chistyakov_output/submission.csv
  /home/code/datasets/chistyakov_packed/submission_best.csv
  /home/code/datasets/egortrushin_output/submission.csv


In [5]:
# Check each source for overlaps in N=20
print('\n=== Checking N=20 for overlaps in each source ===')

for fp in existing_sources:
    try:
        df = pd.read_csv(fp)
    except Exception as e:
        print(f'{fp}: Error loading - {e}')
        continue
    
    if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
        print(f'{fp}: Invalid columns')
        continue
    
    # Get N=20 configuration
    n = 20
    g = df[df['id'].astype(str).str.startswith(f'{n:03d}_')]
    
    if len(g) != n:
        print(f'{os.path.basename(fp)}: N=20 has {len(g)} trees (expected {n})')
        continue
    
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    
    overlaps = check_overlaps(xs, ys, ds)
    score = score_group(xs, ys, ds, tx, ty)
    
    if overlaps:
        print(f'{os.path.basename(fp)}: N=20 score={score:.6f} - HAS {len(overlaps)} OVERLAPS!')
    else:
        print(f'{os.path.basename(fp)}: N=20 score={score:.6f} - OK')


=== Checking N=20 for overlaps in each source ===
preoptimized_submission.csv: N=20 score=0.376057 - OK


santa-2025.csv: N=20 score=0.376057 - OK
71.97.csv: N=20 score=0.378566 - OK
72.49.csv: N=20 score=0.378612 - OK
submission.csv: N=20 score=0.376057 - OK
submission.csv: N=20 score=0.376057 - OK
submission.csv: N=20 score=0.376057 - OK


submission.csv: N=20 score=0.376057 - OK
submission.csv: N=20 score=0.376057 - OK
santa-2025.csv: N=20 score=0.376057 - OK
submission.csv: N=20 score=0.378686 - OK
submission_best.csv: N=20 score=0.376434 - OK
submission.csv: N=20 score=0.378603 - OK


In [6]:
# Load all sources and compute scores per N with overlap checking
print('\n=== Building ensemble with overlap checking ===')

best = {n: {'score': 1e300, 'data': None, 'src': None, 'has_overlap': False} for n in range(1, 201)}

for fp in existing_sources:
    try:
        df = pd.read_csv(fp)
    except Exception as e:
        continue
    
    if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
        continue
    
    df = df.copy()
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    
    for n, g in df.groupby('N'):
        if n < 1 or n > 200:
            continue
        
        if len(g) != n:
            continue
        
        xs = strip(g['x'].to_numpy())
        ys = strip(g['y'].to_numpy())
        ds = strip(g['deg'].to_numpy())
        
        # Check for overlaps
        overlaps = check_overlaps(xs, ys, ds)
        has_overlap = len(overlaps) > 0
        
        sc = score_group(xs, ys, ds, tx, ty)
        
        # Only consider if no overlaps and better score
        if not has_overlap and sc < best[n]['score']:
            best[n]['score'] = float(sc)
            best[n]['data'] = g.drop(columns=['N']).copy()
            best[n]['src'] = os.path.basename(fp)
            best[n]['has_overlap'] = False

print('Ensemble built with overlap checking')


=== Building ensemble with overlap checking ===


Ensemble built with overlap checking


In [7]:
# Check for any N values without valid solutions
missing = [n for n in range(1, 201) if best[n]['data'] is None]
if missing:
    print(f'WARNING: Missing valid solutions for N={missing}')
else:
    print('All N values have valid solutions')

# Build ensemble submission
rows = []
used = {}
total = 0.0

for n in range(1, 201):
    entry = best[n]
    if entry['data'] is None:
        print(f'Warning: No data for N={n}')
        continue
    rows.append(entry['data'])
    used[entry['src']] = used.get(entry['src'], 0) + 1
    total += entry['score']

print('\n=== Source Usage ===')
for src, count in sorted(used.items(), key=lambda x: -x[1]):
    print(f'  {src}: {count} N values')

print(f'\n=== Ensemble Score ===')
print(f'Total score: {total:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {total - 68.922808:.6f}')

All N values have valid solutions

=== Source Usage ===
  santa-2025.csv: 192 N values
  submission.csv: 6 N values
  preoptimized_submission.csv: 2 N values

=== Ensemble Score ===
Total score: 70.676092
Target: 68.922808
Gap: 1.753284


In [8]:
# Create final submission
out = pd.concat(rows, ignore_index=True)
out['sn'] = out['id'].str.split('_').str[0].astype(int)
out['si'] = out['id'].str.split('_').str[1].astype(int)
out = out.sort_values(['sn', 'si']).drop(columns=['sn', 'si'])
out = out[['id', 'x', 'y', 'deg']]

# Save submission
submission_path = '/home/submission/submission.csv'
out.to_csv(submission_path, index=False)
print(f'Saved ensemble submission to {submission_path}')
print(f'Total rows: {len(out)}')

Saved ensemble submission to /home/submission/submission.csv
Total rows: 20100


In [9]:
# Final verification - check ALL N values for overlaps
print('\n=== Final Verification ===')
df_verify = pd.read_csv(submission_path)

overlap_count = 0
for n in range(1, 201):
    g = df_verify[df_verify['id'].str.startswith(f'{n:03d}_')]
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    
    overlaps = check_overlaps(xs, ys, ds)
    if overlaps:
        print(f'N={n}: {len(overlaps)} overlaps detected!')
        overlap_count += 1

if overlap_count == 0:
    print('No overlaps detected in any N value!')
else:
    print(f'\nTotal N values with overlaps: {overlap_count}')


=== Final Verification ===


No overlaps detected in any N value!


In [None]:
# Verify total score
verify_total = 0.0
for n in range(1, 201):
    g = df_verify[df_verify['id'].str.startswith(f'{n:03d}_')]
    xs = strip(g['x'].to_numpy())
    ys = strip(g['y'].to_numpy())
    ds = strip(g['deg'].to_numpy())
    sc = score_group(xs, ys, ds, tx, ty)
    verify_total += sc

print(f'\nVerified ensemble score: {verify_total:.6f}')
print(f'Expected: {total:.6f}')
print(f'Match: {abs(verify_total - total) < 1e-6}')