# Evolver Loop 7 Analysis

Analyzing the situation after 7 experiments with no improvement. Need to find a fundamentally different approach.

In [None]:
import os
import glob
import math
import numpy as np
import pandas as pd
from numba import njit

os.chdir('/home/code')
print('Working directory:', os.getcwd())

In [None]:
# Tree shape constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

MAX_OVERLAP_DIST = 1.8
MAX_OVERLAP_DIST_SQ = MAX_OVERLAP_DIST * MAX_OVERLAP_DIST

@njit(cache=True)
def rotate_point(x, y, cos_a, sin_a):
    return x * cos_a - y * sin_a, x * sin_a + y * cos_a

@njit(cache=True)
def get_tree_vertices(cx, cy, angle_deg):
    angle_rad = angle_deg * math.pi / 180.0
    cos_a = math.cos(angle_rad)
    sin_a = math.sin(angle_rad)
    vertices = np.empty((15, 2), dtype=np.float64)
    pts = np.array([
        [0.0, TIP_Y], [TOP_W / 2.0, TIER_1_Y], [TOP_W / 4.0, TIER_1_Y],
        [MID_W / 2.0, TIER_2_Y], [MID_W / 4.0, TIER_2_Y], [BASE_W / 2.0, BASE_Y],
        [TRUNK_W / 2.0, BASE_Y], [TRUNK_W / 2.0, TRUNK_BOTTOM_Y],
        [-TRUNK_W / 2.0, TRUNK_BOTTOM_Y], [-TRUNK_W / 2.0, BASE_Y],
        [-BASE_W / 2.0, BASE_Y], [-MID_W / 4.0, TIER_2_Y], [-MID_W / 2.0, TIER_2_Y],
        [-TOP_W / 4.0, TIER_1_Y], [-TOP_W / 2.0, TIER_1_Y],
    ], dtype=np.float64)
    for i in range(15):
        rx, ry = rotate_point(pts[i, 0], pts[i, 1], cos_a, sin_a)
        vertices[i, 0] = rx + cx
        vertices[i, 1] = ry + cy
    return vertices

@njit(cache=True)
def point_in_polygon(px, py, vertices):
    n = vertices.shape[0]
    inside = False
    j = n - 1
    for i in range(n):
        xi, yi = vertices[i, 0], vertices[i, 1]
        xj, yj = vertices[j, 0], vertices[j, 1]
        if ((yi > py) != (yj > py)) and (px < (xj - xi) * (py - yi) / (yj - yi) + xi):
            inside = not inside
        j = i
    return inside

@njit(cache=True)
def segments_intersect(p1x, p1y, p2x, p2y, p3x, p3y, p4x, p4y):
    dax = p2x - p1x; day = p2y - p1y
    dbx = p4x - p3x; dby = p4y - p3y
    d1x = p1x - p3x; d1y = p1y - p3y
    d2x = p2x - p3x; d2y = p2y - p3y
    cross_b1 = dbx * d1y - dby * d1x
    cross_b2 = dbx * d2y - dby * d2x
    if cross_b1 * cross_b2 > 0: return False
    d3x = p3x - p1x; d3y = p3y - p1y
    d4x = p4x - p1x; d4y = p4y - p1y
    cross_a1 = dax * d3y - day * d3x
    cross_a2 = dax * d4y - day * d4x
    if cross_a1 * cross_a2 > 0: return False
    return True

@njit(cache=True)
def polygons_overlap(v1, v2):
    n1 = v1.shape[0]; n2 = v2.shape[0]
    for i in range(n1):
        if point_in_polygon(v1[i, 0], v1[i, 1], v2): return True
    for i in range(n2):
        if point_in_polygon(v2[i, 0], v2[i, 1], v1): return True
    for i in range(n1):
        i2 = (i + 1) % n1
        for j in range(n2):
            j2 = (j + 1) % n2
            if segments_intersect(v1[i, 0], v1[i, 1], v1[i2, 0], v1[i2, 1],
                                  v2[j, 0], v2[j, 1], v2[j2, 0], v2[j2, 1]):
                return True
    return False

@njit(cache=True)
def check_any_overlap(xs, ys, degs):
    n = len(xs)
    for i in range(n):
        for j in range(i + 1, n):
            dx = xs[i] - xs[j]; dy = ys[i] - ys[j]
            if dx * dx + dy * dy < MAX_OVERLAP_DIST_SQ:
                v1 = get_tree_vertices(xs[i], ys[i], degs[i])
                v2 = get_tree_vertices(xs[j], ys[j], degs[j])
                if polygons_overlap(v1, v2): return True
    return False

@njit(cache=True)
def calculate_bounding_box(xs, ys, degs):
    n = len(xs)
    min_x = 1e9; min_y = 1e9; max_x = -1e9; max_y = -1e9
    for i in range(n):
        v = get_tree_vertices(xs[i], ys[i], degs[i])
        for j in range(15):
            if v[j, 0] < min_x: min_x = v[j, 0]
            if v[j, 0] > max_x: max_x = v[j, 0]
            if v[j, 1] < min_y: min_y = v[j, 1]
            if v[j, 1] > max_y: max_y = v[j, 1]
    return max(max_x - min_x, max_y - min_y)

@njit(cache=True)
def calculate_score(xs, ys, degs):
    n = len(xs)
    side = calculate_bounding_box(xs, ys, degs)
    return side * side / n

print('Functions defined')

In [None]:
def strip(a):
    return np.array([float(str(v).replace('s', '')) for v in a], np.float64)

def load_submission(filepath):
    try:
        df = pd.read_csv(filepath)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            return None
        df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
        configs = {}
        for n, g in df.groupby('N'):
            xs = strip(g['x'].to_numpy())
            ys = strip(g['y'].to_numpy())
            ds = strip(g['deg'].to_numpy())
            configs[n] = {'x': xs, 'y': ys, 'deg': ds}
        return configs
    except:
        return None

def score_submission(configs):
    total = 0.0
    for n in range(1, 201):
        if n in configs:
            c = configs[n]
            total += calculate_score(c['x'], c['y'], c['deg'])
    return total

def check_overlaps(configs):
    overlaps = []
    for n in range(1, 201):
        if n in configs:
            c = configs[n]
            if check_any_overlap(c['x'], c['y'], c['deg']):
                overlaps.append(n)
    return overlaps

print('Helper functions defined')

In [None]:
# Scan all CSV files in santa25-public
csv_dir = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa25-public/'
csv_files = glob.glob(csv_dir + '*.csv')

results = []
for fp in csv_files:
    configs = load_submission(fp)
    if configs is None:
        continue
    score = score_submission(configs)
    overlaps = check_overlaps(configs)
    results.append({
        'file': os.path.basename(fp),
        'score': score,
        'n_overlaps': len(overlaps),
        'overlaps': overlaps[:5] if len(overlaps) > 5 else overlaps
    })

results_df = pd.DataFrame(results).sort_values('score')
print('\nSanta25-public CSV files:')
print(results_df.to_string())

In [None]:
# Check the best valid (no overlaps) submission
valid_results = [r for r in results if r['n_overlaps'] == 0]
if valid_results:
    best_valid = min(valid_results, key=lambda x: x['score'])
    print(f"\nBest VALID submission: {best_valid['file']} with score {best_valid['score']:.6f}")
else:
    print('\nNo valid submissions found (all have overlaps)')

In [None]:
# Also check telegram-public
csv_dir2 = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/telegram-public/'
csv_files2 = glob.glob(csv_dir2 + '*.csv')

results2 = []
for fp in csv_files2:
    configs = load_submission(fp)
    if configs is None:
        continue
    score = score_submission(configs)
    overlaps = check_overlaps(configs)
    results2.append({
        'file': os.path.basename(fp),
        'score': score,
        'n_overlaps': len(overlaps),
        'overlaps': overlaps[:5] if len(overlaps) > 5 else overlaps
    })

results2_df = pd.DataFrame(results2).sort_values('score')
print('\nTelegram-public CSV files:')
print(results2_df.to_string())

In [None]:
# Load baseline and check per-N scores
baseline_path = '/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv'
baseline = load_submission(baseline_path)
baseline_score = score_submission(baseline)
print(f'Baseline score: {baseline_score:.6f}')

# Per-N scores for baseline
baseline_per_n = {}
for n in range(1, 201):
    c = baseline[n]
    baseline_per_n[n] = calculate_score(c['x'], c['y'], c['deg'])

print('\nBaseline per-N scores (first 20):')
for n in range(1, 21):
    print(f'N={n}: {baseline_per_n[n]:.6f}')

In [None]:
# Analyze which N values contribute most to the score
print('\nScore contribution by N range:')
ranges = [(1, 10), (11, 30), (31, 50), (51, 100), (101, 150), (151, 200)]
for start, end in ranges:
    contrib = sum(baseline_per_n[n] for n in range(start, end+1))
    print(f'N={start}-{end}: {contrib:.4f}')

In [None]:
# Check if any external CSV has better per-N scores than baseline
print('\nSearching for better per-N configurations...')

# Collect all valid configs from all sources
all_sources = []
for fp in csv_files + csv_files2:
    configs = load_submission(fp)
    if configs is None:
        continue
    all_sources.append((os.path.basename(fp), configs))

print(f'Loaded {len(all_sources)} sources')

# For each N, find the best valid config
better_configs = {}
for n in range(1, 201):
    best_score = baseline_per_n[n]
    best_source = 'baseline'
    
    for source_name, configs in all_sources:
        if n not in configs:
            continue
        c = configs[n]
        # Check for overlaps
        if check_any_overlap(c['x'], c['y'], c['deg']):
            continue
        score = calculate_score(c['x'], c['y'], c['deg'])
        if score < best_score - 1e-9:
            best_score = score
            best_source = source_name
    
    if best_source != 'baseline':
        improvement = baseline_per_n[n] - best_score
        better_configs[n] = {'source': best_source, 'score': best_score, 'improvement': improvement}

print(f'\nFound {len(better_configs)} N values with better configs than baseline:')
for n, info in sorted(better_configs.items()):
    print(f'N={n}: {baseline_per_n[n]:.6f} -> {info["score"]:.6f} (improvement: {info["improvement"]:.6f}) from {info["source"]}')

In [None]:
# Key insight: The baseline is already the best valid configuration for ALL N values
# The only way to improve is to find configurations that:
# 1. Are valid (no overlaps)
# 2. Have smaller bounding boxes than baseline

# Let's check the theoretical minimum for small N values
print('\nTheoretical analysis for small N:')
print('N=1: Single tree, minimum bbox = 1.0 (tree height), score = 1.0')
print('N=2: Two trees, need to pack efficiently')
print('N=3: Three trees, need to pack efficiently')

# Check actual baseline scores for small N
for n in range(1, 11):
    c = baseline[n]
    side = calculate_bounding_box(c['x'], c['y'], c['deg'])
    print(f'N={n}: side={side:.6f}, score={baseline_per_n[n]:.6f}')

In [None]:
# CRITICAL INSIGHT: The evaluator pointed out that the jiweiliu implementation was severely simplified.
# Key missing features:
# 1. Multiprocessing - jiweiliu runs SA in parallel across all grid configurations
# 2. Deletion cascade - propagates improvements from larger N to smaller N
# 3. Initial seeds - pre-computed good starting points
# 4. sa_optimize_improved - more sophisticated move types

# The baseline (70.734) is ALREADY BETTER than what jiweiliu can produce (71.5)
# So SA-with-translations is NOT the right approach for improving our baseline.

# The evaluator recommends:
# Option A: Run C++ optimizer with proper parameters (150,000+ iterations, 32+ restarts, multiple generations)
# Option B: Ensemble approach - collect best N-config from multiple sources
# Option C: Fix the SA-with-translations implementation (but this may not help since baseline is already better)

print('\n=== STRATEGIC ANALYSIS ===')
print('1. Baseline score: 70.734327')
print('2. Target score: 68.931058')
print('3. Gap: 1.803269 (2.5%)')
print('4. All 7 experiments achieved EXACTLY the same score')
print('5. The baseline is at a very strong local optimum')
print('')
print('Key insight from evaluator:')
print('- The jiweiliu kernel improves from 71.657 -> 71.5')
print('- Our baseline is 70.734, which is ALREADY BETTER')
print('- SA-with-translations cannot improve an already-optimized baseline')
print('')
print('Recommended approach:')
print('- Run C++ optimizer with MUCH higher parameters')
print('- 150,000+ iterations (vs 20,000 used before)')
print('- 32+ restarts (vs 10 used before)')
print('- Multiple generations (keep running until no improvement)')

In [None]:
# Let's check what the seshurajup kernel does differently
# From the kernel analysis:
# 1. Uses 50,000 iterations, 8 restarts
# 2. Runs MULTIPLE GENERATIONS until no improvement for 10 generations
# 3. Uses OpenMP parallelization
# 4. Ensembles from multiple sources

# The key difference is MULTIPLE GENERATIONS
# Each generation runs SA on all N values, then saves the best
# This allows improvements to compound over time

print('\n=== SESHURAJUP KERNEL ANALYSIS ===')
print('Key features:')
print('1. Ensemble from multiple sources (santa-2025-ensemble-sa-greedy-backtracking, santa-claude)')
print('2. C++ optimizer with 50,000 iterations, 8 restarts')
print('3. MULTIPLE GENERATIONS - keeps running until no improvement for 10 generations')
print('4. OpenMP parallelization for speed')
print('')
print('This is the key insight: MULTIPLE GENERATIONS')
print('Our experiments ran single-pass optimization')
print('The seshurajup kernel runs ENDLESS optimization until convergence')