# Experiment 014: Memetic Algorithm with Overlap Repair

Key changes from exp_012 (GA):
1. LARGE mutations (0.5-1.0 tree widths, not 0.1)
2. REPAIR overlaps instead of rejecting them
3. Apply LOCAL optimization after global perturbation

Focus on N=1-50 (worst efficiency)

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.ops import unary_union
import shutil
import os
import time
from datetime import datetime
import random

np.random.seed(42)
random.seed(42)

# Paths
BASELINE_PATH = '/home/code/external_data/saspav/santa-2025.csv'
WORK_DIR = '/home/code/experiments/014_memetic_algorithm'

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])
TREE_WIDTH = 0.7  # Maximum tree width
TREE_HEIGHT = 1.0  # Maximum tree height

print(f"Setup complete at {datetime.now()}")

Setup complete at 2026-01-19 23:40:35.458529


In [2]:
def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = [(tx * cos_a - ty * sin_a + x, tx * sin_a + ty * cos_a + y) for tx, ty in zip(TX, TY)]
    return Polygon(vertices)

def get_tree_vertices(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = np.column_stack([
        TX * cos_a - TY * sin_a + x,
        TX * sin_a + TY * cos_a + y
    ])
    return vertices

def compute_bounding_side(trees):
    all_vertices = []
    for x, y, deg in trees:
        all_vertices.append(get_tree_vertices(x, y, deg))
    all_vertices = np.vstack(all_vertices)
    min_xy = all_vertices.min(axis=0)
    max_xy = all_vertices.max(axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def compute_score(trees, n):
    if len(trees) != n:
        return float('inf')
    side = compute_bounding_side(trees)
    return side**2 / n

def check_overlap_pair(tree1, tree2):
    """Check if two trees overlap."""
    poly1 = create_tree_polygon(*tree1)
    poly2 = create_tree_polygon(*tree2)
    if poly1.intersects(poly2):
        intersection = poly1.intersection(poly2)
        return intersection.area > 1e-12
    return False

def find_overlapping_pairs(trees):
    """Find all pairs of overlapping trees."""
    overlaps = []
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if check_overlap_pair(trees[i], trees[j]):
                overlaps.append((i, j))
    return overlaps

print("Helper functions defined")

Helper functions defined


In [3]:
def repair_overlaps(trees, max_iterations=100):
    """Repair overlapping trees by pushing them apart."""
    trees = [list(t) for t in trees]  # Make mutable
    
    for iteration in range(max_iterations):
        overlaps = find_overlapping_pairs(trees)
        if not overlaps:
            return [(t[0], t[1], t[2]) for t in trees]  # Convert back to tuples
        
        # For each overlapping pair, push trees apart
        for i, j in overlaps:
            x1, y1, deg1 = trees[i]
            x2, y2, deg2 = trees[j]
            
            # Direction from tree i to tree j
            dx = x2 - x1
            dy = y2 - y1
            dist = np.sqrt(dx**2 + dy**2)
            
            if dist < 1e-6:
                # Trees at same position, move in random direction
                angle = random.random() * 2 * np.pi
                dx, dy = np.cos(angle), np.sin(angle)
                dist = 1.0
            
            # Normalize direction
            dx /= dist
            dy /= dist
            
            # Push apart by small amount
            push_dist = 0.05  # Small push
            trees[i][0] -= dx * push_dist / 2
            trees[i][1] -= dy * push_dist / 2
            trees[j][0] += dx * push_dist / 2
            trees[j][1] += dy * push_dist / 2
    
    # If still overlapping after max iterations, return None
    if find_overlapping_pairs(trees):
        return None
    return [(t[0], t[1], t[2]) for t in trees]

def apply_large_mutation(trees, strength=0.5):
    """Apply LARGE mutations to trees (0.5-1.0 tree widths)."""
    mutated = []
    for x, y, deg in trees:
        # Large position perturbation (0.5 * tree width)
        x += random.gauss(0, strength * TREE_WIDTH)
        y += random.gauss(0, strength * TREE_HEIGHT)
        # Large angle perturbation
        deg += random.gauss(0, strength * 45)  # Up to 45 degrees
        mutated.append((x, y, deg))
    return mutated

print("Repair and mutation functions defined")

Repair and mutation functions defined


In [4]:
# Load baseline configurations
df_baseline = pd.read_csv(BASELINE_PATH)

def load_config_for_n(df, n):
    prefix = f"{n:03d}_"
    trees_df = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append((x, y, deg))
    return trees

baseline_configs = {}
baseline_scores = {}
for n in range(1, 201):
    trees = load_config_for_n(df_baseline, n)
    baseline_configs[n] = trees
    baseline_scores[n] = compute_score(trees, n)

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")

Baseline total score: 70.659959


In [5]:
# Memetic Algorithm with Overlap Repair
class MemeticAlgorithm:
    def __init__(self, n, baseline_trees, pop_size=10):
        self.n = n
        self.baseline_trees = list(baseline_trees)
        self.pop_size = pop_size
        self.best_score = compute_score(baseline_trees, n)
        self.best_config = list(baseline_trees)
        
    def create_initial_population(self):
        """Create population with baseline + large perturbations."""
        population = [self.baseline_trees]  # Start with baseline
        
        # Add LARGE perturbations
        for _ in range(self.pop_size - 1):
            perturbed = apply_large_mutation(self.baseline_trees, strength=0.3)
            repaired = repair_overlaps(perturbed)
            if repaired is not None:
                population.append(repaired)
            else:
                population.append(self.baseline_trees)  # Fallback to baseline
        
        return population
    
    def local_optimize(self, trees, iterations=50):
        """Simple local optimization: try small moves for each tree."""
        best_trees = list(trees)
        best_score = compute_score(trees, self.n)
        
        for _ in range(iterations):
            # Pick random tree
            idx = random.randint(0, len(trees) - 1)
            
            # Try small perturbation
            new_trees = list(best_trees)
            x, y, deg = new_trees[idx]
            new_trees[idx] = (
                x + random.gauss(0, 0.01),
                y + random.gauss(0, 0.01),
                deg + random.gauss(0, 1)
            )
            
            # Check if valid and better
            if not find_overlapping_pairs(new_trees):
                new_score = compute_score(new_trees, self.n)
                if new_score < best_score:
                    best_trees = new_trees
                    best_score = new_score
        
        return best_trees, best_score
    
    def run(self, generations=30):
        """Run memetic algorithm."""
        population = self.create_initial_population()
        
        for gen in range(generations):
            # Evaluate and local optimize each individual
            scored_pop = []
            for ind in population:
                optimized, score = self.local_optimize(ind, iterations=20)
                scored_pop.append((score, optimized))
            
            scored_pop.sort(key=lambda x: x[0])
            
            # Update best
            if scored_pop[0][0] < self.best_score:
                self.best_score = scored_pop[0][0]
                self.best_config = scored_pop[0][1]
            
            # Selection: keep top half
            survivors = [ind for score, ind in scored_pop[:max(2, self.pop_size // 2)]]
            
            # Create new population with LARGE mutations
            new_population = survivors.copy()
            while len(new_population) < self.pop_size:
                parent = random.choice(survivors)
                mutated = apply_large_mutation(parent, strength=0.3)
                repaired = repair_overlaps(mutated)
                if repaired is not None:
                    new_population.append(repaired)
                else:
                    new_population.append(parent)
            
            population = new_population
        
        return self.best_config, self.best_score

print("Memetic Algorithm class defined")

Memetic Algorithm class defined


In [None]:
# Run Memetic Algorithm on N=1-50
print("\n" + "="*60)
print("Running Memetic Algorithm with Overlap Repair")
print("="*60)

improved_configs = {}
improved_scores = {}

for n in range(1, 51):
    baseline_trees = baseline_configs[n]
    baseline_score = baseline_scores[n]
    
    # Run Memetic Algorithm
    ma = MemeticAlgorithm(n, baseline_trees, pop_size=15)
    best_config, best_score = ma.run(generations=50)
    
    improvement = baseline_score - best_score
    if improvement > 1e-9:
        print(f"N={n}: {baseline_score:.9f} -> {best_score:.9f} (improvement: {improvement:.9f})")
        improved_configs[n] = best_config
        improved_scores[n] = best_score
    else:
        improved_configs[n] = baseline_trees
        improved_scores[n] = baseline_score

print(f"\nN values improved: {sum(1 for n in range(1, 51) if improved_scores[n] < baseline_scores[n] - 1e-9)}/50")

In [None]:
# Add remaining N values from baseline
for n in range(51, 201):
    improved_configs[n] = baseline_configs[n]
    improved_scores[n] = baseline_scores[n]

# Compute total score
MA_total = sum(improved_scores.values())
print(f"\nBaseline total: {baseline_total:.6f}")
print(f"Memetic Algorithm total: {MA_total:.6f}")
print(f"Improvement: {baseline_total - MA_total:.9f}")

In [None]:
# Build and validate submission
def build_submission(configs):
    rows = []
    for n in range(1, 201):
        trees = configs[n]
        for i, (x, y, deg) in enumerate(trees):
            rows.append({
                'id': f"{n:03d}_{i}",
                'x': f"s{x:.18f}",
                'y': f"s{y:.18f}",
                'deg': f"s{deg:.18f}"
            })
    return pd.DataFrame(rows)

df_submission = build_submission(improved_configs)
print(f"Submission has {len(df_submission)} rows")

# Validate
overlap_count = 0
for n in range(1, 201):
    if find_overlapping_pairs(improved_configs[n]):
        overlap_count += 1
        if overlap_count <= 5:
            print(f"N={n}: OVERLAP")

print(f"\nTotal overlaps: {overlap_count}/200")

In [None]:
# Save submission
if overlap_count == 0 and MA_total < baseline_total - 1e-9:
    df_submission.to_csv('/home/submission/submission.csv', index=False)
    print(f"\nSaved improved submission: {MA_total:.6f}")
elif overlap_count == 0:
    shutil.copy(BASELINE_PATH, '/home/submission/submission.csv')
    print("\nNo improvement - saved baseline")
else:
    print(f"\nOverlaps detected - not saving")

In [None]:
# Summary
print("="*60)
print("EXPERIMENT 014 SUMMARY: Memetic Algorithm with Overlap Repair")
print("="*60)
print(f"Baseline score: {baseline_total:.6f}")
print(f"MA score: {MA_total:.6f}")
print(f"Improvement: {baseline_total - MA_total:.9f}")
print(f"Overlaps: {overlap_count}/200")
print("="*60)