# Experiment 006: Genetic Algorithm Approach

Try a fundamentally different optimization approach - Genetic Algorithm with crossover.

Key differences from SA:
1. Population-based search (multiple solutions evolving)
2. Crossover operator (combine good solutions)
3. Mutation operator (local perturbation)
4. Selection pressure (keep best solutions)

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import json
import random
import math
import copy
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed
import multiprocessing

print(f'Available CPUs: {multiprocessing.cpu_count()}')

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        self._update_polygon()
    
    def _update_polygon(self):
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0')), float(tip_y)),
            (float(top_w / Decimal('2')), float(tier_1_y)),
            (float(top_w / Decimal('4')), float(tier_1_y)),
            (float(mid_w / Decimal('2')), float(tier_2_y)),
            (float(mid_w / Decimal('4')), float(tier_2_y)),
            (float(base_w / Decimal('2')), float(base_y)),
            (float(trunk_w / Decimal('2')), float(base_y)),
            (float(trunk_w / Decimal('2')), float(trunk_bottom_y)),
            (float(-(trunk_w / Decimal('2'))), float(trunk_bottom_y)),
            (float(-(trunk_w / Decimal('2'))), float(base_y)),
            (float(-(base_w / Decimal('2'))), float(base_y)),
            (float(-(mid_w / Decimal('4'))), float(tier_2_y)),
            (float(-(mid_w / Decimal('2'))), float(tier_2_y)),
            (float(-(top_w / Decimal('4'))), float(tier_1_y)),
            (float(-(top_w / Decimal('2'))), float(tier_1_y)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))
    
    def set_params(self, x, y, angle):
        self.center_x = Decimal(str(x))
        self.center_y = Decimal(str(y))
        self.angle = Decimal(str(angle))
        self._update_polygon()
    
    def get_params(self):
        return float(self.center_x), float(self.center_y), float(self.angle)

def has_collision(trees):
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if trees[i].polygon.intersects(trees[j].polygon) and not trees[i].polygon.touches(trees[j].polygon):
                return True
    return False

def get_bounding_box_side(trees):
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def calculate_score(trees):
    n = len(trees)
    side = get_bounding_box_side(trees)
    return side ** 2 / n

print('Functions defined.')

Available CPUs: 26
Functions defined.


In [2]:
def load_solution(csv_path):
    df = pd.read_csv(csv_path)
    df['x'] = df['x'].astype(str).str.strip().str.lstrip('s')
    df['y'] = df['y'].astype(str).str.strip().str.lstrip('s')
    df['deg'] = df['deg'].astype(str).str.strip().str.lstrip('s')
    df[['group_id', 'item_id']] = df['id'].str.split('_', n=2, expand=True)
    
    solution = {}
    for group_id, group_data in df.groupby('group_id'):
        n = int(group_id)
        trees = [(row['x'], row['y'], row['deg']) for _, row in group_data.iterrows()]
        solution[n] = trees
    return solution

def score_config(trees_data):
    tree_list = [ChristmasTree(x, y, deg) for x, y, deg in trees_data]
    return calculate_score(tree_list)

# Load baseline
baseline = load_solution('/home/code/experiments/004_cpp_sa_optimizer/input.csv')
baseline_scores = {n: score_config(baseline[n]) for n in range(1, 201)}
baseline_total = sum(baseline_scores.values())
print(f'Baseline total score: {baseline_total:.6f}')

Baseline total score: 70.647306


In [5]:
class GeneticAlgorithm:
    def __init__(self, n, initial_config, pop_size=20, generations=100, mutation_rate=0.3):
        self.n = n
        self.pop_size = pop_size
        self.generations = generations
        self.mutation_rate = mutation_rate
        
        # Initialize population with variations of initial config
        self.population = []
        for _ in range(pop_size):
            config = self.mutate_config(initial_config, strength=0.5)
            if config is not None:
                self.population.append(config)
        
        # Add original config
        self.population.append(list(initial_config))
        
        self.best_config = list(initial_config)
        self.best_score = score_config(initial_config)
    
    def mutate_config(self, config, strength=0.1):
        """Mutate a configuration by perturbing positions and angles"""
        new_config = []
        for x, y, deg in config:
            new_x = float(x) + random.gauss(0, strength)
            new_y = float(y) + random.gauss(0, strength)
            new_deg = (float(deg) + random.gauss(0, strength * 30)) % 360
            new_config.append((str(new_x), str(new_y), str(new_deg)))
        
        # Check validity
        trees = [ChristmasTree(x, y, deg) for x, y, deg in new_config]
        if has_collision(trees):
            return None
        return new_config
    
    def crossover(self, parent1, parent2):
        """Crossover two configurations - take trees from both parents"""
        child = []
        for i in range(len(parent1)):
            if random.random() < 0.5:
                child.append(parent1[i])
            else:
                child.append(parent2[i])
        
        # Check validity
        trees = [ChristmasTree(x, y, deg) for x, y, deg in child]
        if has_collision(trees):
            return None
        return child
    
    def evolve(self):
        """Run one generation of evolution"""
        # Score all individuals
        scored = []
        for config in self.population:
            try:
                score = score_config(config)
                scored.append((score, config))
            except:
                pass
        
        if len(scored) < 2:
            return
        
        scored.sort(key=lambda x: x[0])
        
        # Update best
        if scored[0][0] < self.best_score:
            self.best_score = scored[0][0]
            self.best_config = scored[0][1]
        
        # Selection - keep top 50%
        survivors = [config for score, config in scored[:max(2, len(scored)//2)]]
        
        # Create new population
        new_pop = list(survivors)
        
        # Crossover
        attempts = 0
        while len(new_pop) < self.pop_size and attempts < 100:
            attempts += 1
            if len(survivors) >= 2:
                p1, p2 = random.sample(survivors, 2)
                child = self.crossover(p1, p2)
                if child is not None:
                    new_pop.append(child)
                    continue
            # If crossover fails, try mutation
            child = self.mutate_config(random.choice(survivors), strength=0.1)
            if child is not None:
                new_pop.append(child)
        
        # Mutation
        for i in range(len(new_pop)):
            if random.random() < self.mutation_rate:
                mutated = self.mutate_config(new_pop[i], strength=0.05)
                if mutated is not None:
                    new_pop[i] = mutated
        
        self.population = new_pop[:self.pop_size]
    
    def run(self):
        """Run the genetic algorithm"""
        for gen in range(self.generations):
            self.evolve()
        return self.best_config, self.best_score

print('GA class defined.')

GA class defined.


In [6]:
# Test GA on small N values (2-20) where we have the most room for improvement
improvements = []
best_solution = {n: list(baseline[n]) for n in baseline}

print('Testing GA on N=2-30...')
for n in tqdm(range(2, 31)):
    current_score = baseline_scores[n]
    
    # Run GA
    ga = GeneticAlgorithm(n, baseline[n], pop_size=30, generations=50, mutation_rate=0.3)
    best_config, best_score = ga.run()
    
    if best_score < current_score - 1e-9:
        improvements.append((n, current_score, best_score, current_score - best_score))
        best_solution[n] = best_config
        print(f'  N={n:3d}: {current_score:.6f} -> {best_score:.6f} (improvement: {current_score - best_score:.6f})')

print(f'\nFound {len(improvements)} improvements')
total_improvement = sum(delta for _, _, _, delta in improvements)
print(f'Total improvement: {total_improvement:.6f}')

Testing GA on N=2-30...


  0%|          | 0/29 [00:00<?, ?it/s]

  3%|▎         | 1/29 [00:00<00:14,  1.95it/s]

  7%|▋         | 2/29 [00:01<00:18,  1.46it/s]

 10%|█         | 3/29 [00:02<00:22,  1.14it/s]

 14%|█▍        | 4/29 [00:03<00:27,  1.09s/it]

 17%|█▋        | 5/29 [00:05<00:32,  1.35s/it]

 24%|██▍       | 7/29 [00:05<00:15,  1.44it/s]

 31%|███       | 9/29 [00:05<00:08,  2.28it/s]

 34%|███▍      | 10/29 [00:06<00:06,  2.77it/s]

 38%|███▊      | 11/29 [00:06<00:05,  3.35it/s]

 41%|████▏     | 12/29 [00:06<00:04,  3.97it/s]

 45%|████▍     | 13/29 [00:06<00:03,  4.56it/s]

 48%|████▊     | 14/29 [00:06<00:02,  5.07it/s]

 52%|█████▏    | 15/29 [00:06<00:02,  5.47it/s]

 55%|█████▌    | 16/29 [00:06<00:02,  5.68it/s]

 59%|█████▊    | 17/29 [00:07<00:02,  5.74it/s]

 62%|██████▏   | 18/29 [00:07<00:01,  5.70it/s]

 66%|██████▌   | 19/29 [00:07<00:01,  5.57it/s]

 69%|██████▉   | 20/29 [00:07<00:01,  5.40it/s]

 72%|███████▏  | 21/29 [00:07<00:01,  5.20it/s]

 76%|███████▌  | 22/29 [00:08<00:01,  4.98it/s]

 79%|███████▉  | 23/29 [00:08<00:01,  4.78it/s]

 83%|████████▎ | 24/29 [00:08<00:01,  4.56it/s]

 86%|████████▌ | 25/29 [00:08<00:00,  4.37it/s]

 90%|████████▉ | 26/29 [00:08<00:00,  4.19it/s]

 93%|█████████▎| 27/29 [00:09<00:00,  4.04it/s]

 97%|█████████▋| 28/29 [00:09<00:00,  3.89it/s]

100%|██████████| 29/29 [00:09<00:00,  3.74it/s]

100%|██████████| 29/29 [00:09<00:00,  2.95it/s]


Found 0 improvements
Total improvement: 0.000000





In [None]:
# Calculate new total score
new_total = sum(score_config(best_solution[n]) for n in range(1, 201))

print(f'Baseline score: {baseline_total:.6f}')
print(f'After GA: {new_total:.6f}')
print(f'Improvement: {baseline_total - new_total:.6f}')
print(f'Target: 68.919154')
print(f'Gap to target: {new_total - 68.919154:.6f}')

In [None]:
# Save submission
rows = []
for n in range(1, 201):
    for i, (x, y, deg) in enumerate(best_solution[n]):
        rows.append({
            'id': f'{n:03d}_{i}',
            'x': f's{x}',
            'y': f's{y}',
            'deg': f's{deg}'
        })

submission_df = pd.DataFrame(rows)
submission_df.to_csv('/home/submission/submission.csv', index=False)
submission_df.to_csv('submission.csv', index=False)
print(f'Saved submission with {len(submission_df)} rows')

In [None]:
# Save metrics
metrics = {
    'cv_score': new_total,
    'baseline_score': baseline_total,
    'improvement': baseline_total - new_total,
    'num_improvements': len(improvements),
    'improvements_detail': [(n, old, new, delta) for n, old, new, delta in improvements]
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f'Saved metrics')