# Experiment 013: Genetic Algorithm for Tree Packing

Implement a GA to escape the local optimum:
1. Population: Multiple configurations per N (baseline + perturbations + lattice)
2. Crossover: Combine tree positions from two parents
3. Mutation: Small random perturbations
4. Selection: Keep best configurations
5. Local optimization: Apply bbox3 to refine

Focus on N=1-50 first (worst efficiency, 26.9% of total score)

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
from decimal import Decimal, getcontext
import subprocess
import shutil
import os
import time
from datetime import datetime
import random
from copy import deepcopy

getcontext().prec = 30
scale_factor = Decimal("1e18")
np.random.seed(42)
random.seed(42)

# Paths
BASELINE_PATH = '/home/code/external_data/saspav/santa-2025.csv'
WORK_DIR = '/home/code/experiments/013_genetic_algorithm'

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

print(f"Setup complete at {datetime.now()}")

In [None]:
def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = [(tx * cos_a - ty * sin_a + x, tx * sin_a + ty * cos_a + y) for tx, ty in zip(TX, TY)]
    return Polygon(vertices)

def get_tree_vertices(x, y, deg):
    """Get all vertices of a tree polygon."""
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = np.column_stack([
        TX * cos_a - TY * sin_a + x,
        TX * sin_a + TY * cos_a + y
    ])
    return vertices

def compute_bounding_side(trees):
    """Compute bounding box side length for a list of (x, y, deg) tuples."""
    all_vertices = []
    for x, y, deg in trees:
        all_vertices.append(get_tree_vertices(x, y, deg))
    all_vertices = np.vstack(all_vertices)
    min_xy = all_vertices.min(axis=0)
    max_xy = all_vertices.max(axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def compute_score(trees, n):
    """Compute score for a configuration."""
    if len(trees) != n:
        return float('inf')
    side = compute_bounding_side(trees)
    return side**2 / n

def check_overlaps(trees):
    """Check if any trees overlap."""
    if len(trees) <= 1:
        return False
    polygons = [create_tree_polygon(x, y, deg) for x, y, deg in trees]
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-12:
                    return True
    return False

print("Helper functions defined")

In [None]:
# Load baseline configurations
df_baseline = pd.read_csv(BASELINE_PATH)

def load_config_for_n(df, n):
    """Load configuration for N as list of (x, y, deg) tuples."""
    prefix = f"{n:03d}_"
    trees_df = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append((x, y, deg))
    return trees

# Load baseline scores
baseline_configs = {}
baseline_scores = {}
for n in range(1, 201):
    trees = load_config_for_n(df_baseline, n)
    baseline_configs[n] = trees
    baseline_scores[n] = compute_score(trees, n)

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")

# Show worst efficiency N values
efficiencies = {n: baseline_scores[n] / (0.355 * n) for n in range(1, 201)}  # 0.355 is theoretical min
worst_n = sorted(efficiencies.keys(), key=lambda n: -efficiencies[n])[:20]
print(f"\nWorst efficiency N values: {worst_n}")
print(f"N=1 score: {baseline_scores[1]:.6f}, efficiency: {efficiencies[1]:.3f}x")

In [None]:
# Genetic Algorithm for a single N value
class TreePackingGA:
    def __init__(self, n, baseline_trees, pop_size=20, mutation_rate=0.3):
        self.n = n
        self.baseline_trees = baseline_trees
        self.pop_size = pop_size
        self.mutation_rate = mutation_rate
        self.best_score = compute_score(baseline_trees, n)
        self.best_config = list(baseline_trees)
        
    def create_initial_population(self):
        """Create initial population with baseline + perturbations."""
        population = [list(self.baseline_trees)]  # Start with baseline
        
        # Add random perturbations of baseline
        for _ in range(self.pop_size - 1):
            perturbed = self.mutate(list(self.baseline_trees), strength=0.1)
            population.append(perturbed)
        
        return population
    
    def mutate(self, trees, strength=0.05):
        """Apply random perturbations to trees."""
        mutated = []
        for x, y, deg in trees:
            if random.random() < self.mutation_rate:
                # Perturb position
                x += random.gauss(0, strength)
                y += random.gauss(0, strength)
                # Perturb angle
                deg += random.gauss(0, strength * 10)
            mutated.append((x, y, deg))
        return mutated
    
    def crossover(self, parent1, parent2):
        """Combine two parent configurations."""
        if len(parent1) != len(parent2):
            return parent1
        
        # Single-point crossover
        crossover_point = random.randint(1, len(parent1) - 1)
        child = parent1[:crossover_point] + parent2[crossover_point:]
        return child
    
    def evaluate(self, trees):
        """Evaluate a configuration (lower is better)."""
        if check_overlaps(trees):
            return float('inf')  # Invalid configuration
        return compute_score(trees, self.n)
    
    def run(self, generations=50):
        """Run the GA for specified generations."""
        population = self.create_initial_population()
        
        for gen in range(generations):
            # Evaluate all individuals
            scores = [(self.evaluate(ind), ind) for ind in population]
            scores.sort(key=lambda x: x[0])
            
            # Update best
            if scores[0][0] < self.best_score:
                self.best_score = scores[0][0]
                self.best_config = scores[0][1]
            
            # Selection: keep top half
            survivors = [ind for score, ind in scores[:self.pop_size // 2]]
            
            # Create new population
            new_population = survivors.copy()
            
            # Crossover and mutation
            while len(new_population) < self.pop_size:
                parent1 = random.choice(survivors)
                parent2 = random.choice(survivors)
                child = self.crossover(parent1, parent2)
                child = self.mutate(child)
                new_population.append(child)
            
            population = new_population
        
        return self.best_config, self.best_score

print("GA class defined")

In [None]:
# Run GA on worst efficiency N values
print("\n" + "="*60)
print("Running Genetic Algorithm on worst efficiency N values")
print("="*60)

improved_configs = {}
improved_scores = {}

# Focus on N=1-50 (worst efficiency)
for n in range(1, 51):
    baseline_trees = baseline_configs[n]
    baseline_score = baseline_scores[n]
    
    # Run GA
    ga = TreePackingGA(n, baseline_trees, pop_size=30, mutation_rate=0.4)
    best_config, best_score = ga.run(generations=100)
    
    improvement = baseline_score - best_score
    if improvement > 1e-9:
        print(f"N={n}: {baseline_score:.9f} -> {best_score:.9f} (improvement: {improvement:.9f})")
        improved_configs[n] = best_config
        improved_scores[n] = best_score
    else:
        improved_configs[n] = baseline_trees
        improved_scores[n] = baseline_score

print(f"\nN values improved: {sum(1 for n in range(1, 51) if improved_scores[n] < baseline_scores[n] - 1e-9)}/50")

In [None]:
# Add remaining N values from baseline
for n in range(51, 201):
    improved_configs[n] = baseline_configs[n]
    improved_scores[n] = baseline_scores[n]

# Compute total score
GA_total = sum(improved_scores.values())
print(f"\nBaseline total: {baseline_total:.6f}")
print(f"GA total: {GA_total:.6f}")
print(f"Improvement: {baseline_total - GA_total:.9f}")

In [None]:
# Build submission from improved configs
def build_submission(configs):
    """Build submission DataFrame from configs dict."""
    rows = []
    for n in range(1, 201):
        trees = configs[n]
        for i, (x, y, deg) in enumerate(trees):
            rows.append({
                'id': f"{n:03d}_{i}",
                'x': f"s{x:.18f}",
                'y': f"s{y:.18f}",
                'deg': f"s{deg:.18f}"
            })
    return pd.DataFrame(rows)

df_submission = build_submission(improved_configs)
print(f"Submission has {len(df_submission)} rows")

# Validate
overlap_count = 0
for n in range(1, 201):
    if check_overlaps(improved_configs[n]):
        overlap_count += 1
        if overlap_count <= 5:
            print(f"N={n}: OVERLAP")

print(f"\nTotal overlaps: {overlap_count}/200")

In [None]:
# Save submission if valid and improved
if overlap_count == 0 and GA_total < baseline_total - 1e-9:
    df_submission.to_csv('/home/submission/submission.csv', index=False)
    print(f"\nSaved improved submission: {GA_total:.6f}")
elif overlap_count == 0:
    shutil.copy(BASELINE_PATH, '/home/submission/submission.csv')
    print("\nNo improvement - saved baseline")
else:
    print(f"\nOverlaps detected - not saving")

In [None]:
# Summary
print("="*60)
print("EXPERIMENT 013 SUMMARY: Genetic Algorithm")
print("="*60)
print(f"Baseline score: {baseline_total:.6f}")
print(f"GA score: {GA_total:.6f}")
print(f"Improvement: {baseline_total - GA_total:.9f}")
print(f"Overlaps: {overlap_count}/200")
print("="*60)