# Genetic algorithm experiments

## Tests

### Setup

#### Ray initialization


In [1]:
import os
import sys

import pandas as pd
from tqdm.notebook import tqdm

sys.path.append(os.path.abspath("../src"))

os.environ["PYTHONPATH"] = os.path.abspath("../src")

import ray

In [2]:
def reset_ray():
    if ray.is_initialized():
        ray.shutdown()

    ray.init(
        runtime_env={"env_vars": {"PYTHONPATH": os.environ["PYTHONPATH"]}},
        include_dashboard=True,
        dashboard_port=8265,
    )

#### Problem loading


In [3]:
import glob
import os

from model import Problem
from utils import load_from_json

data_dir = "problems"
json_files = glob.glob(os.path.join(data_dir, "*.json"))

data: list[tuple[str, Problem]] = []
for json_file in json_files:
    testcase_name = os.path.basename(json_file).replace(".json", "")
    problem = load_from_json(json_file)
    data.append((testcase_name, problem))

data.sort(key=lambda x: x[0])

#### Setup results directory


In [4]:
results_dir = "results"

os.makedirs(results_dir, exist_ok=True)

In [5]:
def save_results(results, directory, filename_suffix):
    df = pd.DataFrame(results)
    testcase_name = df["testcase"].iloc[0]
    filename = os.path.join(directory, f"{testcase_name}_{filename_suffix}.csv")
    df.to_csv(filename, index=False)

#### Setup all mutations

In [6]:
from ga.mutations import (
    CouriersMutation,
    Mutation,
    NewCourierMutation,
    PackagesMutation,
    RouteMutation,
    UnusedVehiclesMutation,
    UsedVehiclesMutation,
)

MUTATIONS: list[Mutation] = [
    UsedVehiclesMutation,
    UnusedVehiclesMutation,
    CouriersMutation,
    PackagesMutation,
    RouteMutation,
    NewCourierMutation,
]

MUTATION_ALIAS = {
    "CouriersMutation": "CM",
    "UsedVehiclesMutation": "UsedVM",
    "UnusedVehiclesMutation": "UnusedVM",
    "NewCourierMutation": "NewCM",
    "PackagesMutation": "PM",
    "RouteMutation": "RM",
}

#### Genetic algorithm loops

##### Inital configuration

In [7]:
GA_RUN_PATIENCE = 100
GA_INITIAL_POPULATION_SIZE = 50
GA_MAX_RUN_ITERATIONS = 300
GA_RUN_REPEAT = 10

CONFIG = {
    "GA_RUN_PATIENCE": GA_RUN_PATIENCE,
    "GA_INITIAL_POPULATION_SIZE": GA_INITIAL_POPULATION_SIZE,
    "GA_MAX_RUN_ITERATIONS": GA_MAX_RUN_ITERATIONS,
    "GA_RUN_REPEAT": GA_RUN_REPEAT,
}

##### Single GA loop


In [8]:
import time

import numpy as np

from ga import GA
from generator import Generator


@ray.remote
def run_single_ga_repeat(problem, mutations, population, config):
    max_patience = config["GA_RUN_PATIENCE"]
    max_iterations = config["GA_MAX_RUN_ITERATIONS"]

    best_solution_cost = np.inf
    patience = max_patience
    num_iterations = 0

    for mutation in MUTATIONS:
        if mutation not in mutations:
            mutation.proba = 0
        else:
            mutation.proba = 0.5

    ga = GA(problem=problem, initial_population=population, C=1.2, alpha=0.9)

    start_time = time.perf_counter()

    for state in ga.run(max_iter=max_iterations):
        current_cost = ga.get_cost(state.solution)

        if current_cost < best_solution_cost:
            best_solution_cost = current_cost
            patience = max_patience
        else:
            patience -= 1

        if patience <= 0:
            break
        num_iterations += 1

    elapsed_time = time.perf_counter() - start_time

    return {
        "cost": best_solution_cost,
        "iterations": num_iterations,
        "time": elapsed_time,
        "cost_func_evals": ga._cost_function_runs,
    }


##### Multiple GA loop job

In [9]:
@ray.remote
def run_suite_for_testcase_remote(
    testcase_name, problem, mutations, population, config
):
    repeats = config["GA_RUN_REPEAT"]

    futures = [
        run_single_ga_repeat.remote(problem, mutations, population, config)
        for _ in range(repeats)
    ]

    results = ray.get(futures)

    best_costs = [r["cost"] for r in results]
    iterations_list = [r["iterations"] for r in results]
    times = [r["time"] for r in results]
    cost_func_evals = [r["cost_func_evals"] for r in results]

    stats = {
        "testcase": testcase_name,
        "mutation_suite": [m.__name__ for m in mutations],
        "cost_mean": np.mean(best_costs),
        "cost_std": np.std(best_costs),
        "cost_max": max(best_costs),
        "cost_min": min(best_costs),
        "cost_median": np.median(best_costs),
        "iterations_mean": np.mean(iterations_list),
        "iterations_std": np.std(iterations_list),
        "iterations_min": min(iterations_list),
        "iterations_max": max(iterations_list),
        "iterations_median": np.median(iterations_list),
        "time_mean": np.mean(times),
        "time_std": np.std(times),
        "time_min": min(times),
        "time_max": max(times),
        "time_median": np.median(times),
        "cost_func_evals_min": min(cost_func_evals),
        "cost_func_evals_max": max(cost_func_evals),
        "cost_func_evals_mean": np.mean(cost_func_evals),
        "cost_func_evals_std": np.std(cost_func_evals),
        "cost_func_evals_median": np.median(cost_func_evals),
    }

    stats = {k: round(v, 2) if isinstance(v, float) else v for k, v in stats.items()}

    return stats

### Mutation subset testing

#### Subset generation

In [10]:
from itertools import chain, combinations


def get_all_subsets(lst):
    return list(chain.from_iterable(combinations(lst, r) for r in range(len(lst) + 1)))


In [11]:
mutation_suite = get_all_subsets(MUTATIONS)

mutations_suite_df = pd.DataFrame(
    {
        "mutation_suite": [
            list(MUTATION_ALIAS[m.__name__] for m in mutations)
            if len(mutations) > 0
            else []
            for mutations in mutation_suite
        ]
    }
)

display(mutations_suite_df)

Unnamed: 0,mutation_suite
0,[]
1,[UsedVM]
2,[UnusedVM]
3,[CM]
4,[PM]
...,...
59,"[UsedVM, UnusedVM, CM, RM, NewCM]"
60,"[UsedVM, UnusedVM, PM, RM, NewCM]"
61,"[UsedVM, CM, PM, RM, NewCM]"
62,"[UnusedVM, CM, PM, RM, NewCM]"


#### Setup results directory

In [12]:
mutations_results_dir = os.path.join(results_dir, "mutations")
os.makedirs(mutations_results_dir, exist_ok=True)


#### Main loop

In [None]:
reset_ray()

2025-06-14 13:10:56,777	INFO worker.py:1879 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[33m(raylet)[0m [2025-06-14 13:19:56,749 E 378637 378637] (raylet) node_manager.cc:3287: 4 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 4b9ab173331f9c37ade5d87ce062bc14d16f0a06f427b703721e690b, IP: 172.24.109.133) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 172.24.109.133`
[33m(raylet)[0m 
[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.
[33m(raylet)[0m 
[33m(raylet)[0m [2025-06-14 13:20:56,750 E 378637 378637] (rayl

In [14]:
for testcase_name, problem in tqdm(data, desc="Testcases"):
    generator = Generator(problem=problem)
    population = generator.generate_many_feasible(
        num_to_find=GA_INITIAL_POPULATION_SIZE, max_attempts=1000, verbose=False
    )

    futures = []
    metadata = []
    for mutations in mutation_suite:
        future = run_suite_for_testcase_remote.remote(
            testcase_name, problem, mutations, population, config=CONFIG
        )
        futures.append(future)
        metadata.append([m.__name__ for m in mutations])

    testcase_results = []
    with tqdm(total=len(futures), desc=f"Mutations for {testcase_name}") as pbar:
        for i, future in enumerate(futures):
            try:
                res = ray.get(future)
                testcase_results.append(res)

            except Exception as e:
                print(
                    f"ERROR in testcase {testcase_name}, mutations {metadata[i]}: {e}"
                )
            pbar.update(1)

    save_results(testcase_results, mutations_results_dir, "mutations_stats")


Testcases:   0%|          | 0/5 [00:00<?, ?it/s]

Mutations for 01-one-courier:   0%|          | 0/64 [00:00<?, ?it/s]

Mutations for 02-four-couriers:   0%|          | 0/64 [00:00<?, ?it/s]

Mutations for 03-big:   0%|          | 0/64 [00:00<?, ?it/s]

Mutations for 04-medium:   0%|          | 0/64 [00:00<?, ?it/s]

Mutations for 05-small:   0%|          | 0/64 [00:00<?, ?it/s]

### Populaiton size testing

#### Configuration

In [19]:
SELECTED_MUTATIONS = [
    PackagesMutation,
    RouteMutation,
    UsedVehiclesMutation,
    UnusedVehiclesMutation,
]

POPULATION_SUITE = [10, 25, 50, 75, 100, 150, 200, 250, 300, 350, 400, 450, 500]

#### Setup results directory

In [20]:
population_results_dir = os.path.join(results_dir, "population")
os.makedirs(population_results_dir, exist_ok=True)

#### Main loop

In [21]:
reset_ray()

2025-06-14 13:57:26,481	INFO worker.py:1879 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[33m(raylet)[0m [2025-06-14 14:07:26,430 E 405882 405882] (raylet) node_manager.cc:3287: 1 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 67e50211ceda5add154495be4dedb79222251dbdd8f5c83fd01770fc, IP: 172.24.109.133) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 172.24.109.133`
[33m(raylet)[0m 
[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.
[33m(raylet)[0m 
[33m(raylet)[0m [2025-06-14 14:08:26,431 E 405882 405882] (rayl

In [22]:
for testcase_name, problem in tqdm(data, desc="Testcases"):
    generator = Generator(problem=problem)

    testcase_stats = []

    for population_size in tqdm(
        POPULATION_SUITE, desc=f"Population sizes for {testcase_name}"
    ):
        generator = Generator(problem=problem)
        population = generator.generate_many_feasible(
            num_to_find=population_size, max_attempts=100000, verbose=False
        )

        future = run_suite_for_testcase_remote.remote(
            testcase_name, problem, SELECTED_MUTATIONS, population, config=CONFIG
        )

        try:
            res = ray.get(future)
            res["population_size"] = population_size
            testcase_stats.append(res)
        except Exception as e:
            print(
                f"ERROR in testcase {testcase_name}, population {population_size}: {e}"
            )

    save_results(testcase_stats, population_results_dir, "population_stats")


Testcases:   0%|          | 0/5 [00:00<?, ?it/s]

Population sizes for 01-one-courier:   0%|          | 0/13 [00:00<?, ?it/s]

Population sizes for 02-four-couriers:   0%|          | 0/13 [00:00<?, ?it/s]

Population sizes for 03-big:   0%|          | 0/13 [00:00<?, ?it/s]

Population sizes for 04-medium:   0%|          | 0/13 [00:00<?, ?it/s]

Population sizes for 05-small:   0%|          | 0/13 [00:00<?, ?it/s]

## Results

### Testcase desctiption

In [None]:
import json

test_dir = "problems/"

test_files = glob.glob(f"{test_dir}/*.json")


def flatten_dict(d, parent_key="", sep="."):
    items = {}
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.update(flatten_dict(v, new_key, sep=sep))
        elif isinstance(v, list):
            items[new_key] = len(v)
        else:
            items[new_key] = v
    return items

In [None]:
tests_summary = []

for f in test_files:
    row = {"testcase": f.replace("problems/", "").replace(".json", "")}
    with open(f, "r") as f:
        data = json.load(f)
        row.update(flatten_dict(data))
        tests_summary.append(row)

tests_desc_df = pd.DataFrame(tests_summary)

if "permissions" in tests_desc_df.columns:
    tests_desc_df.drop(columns=["permissions"], inplace=True)

tests_desc_df.sort_values(by="testcase", inplace=True)

In [None]:
tests_desc_df.head()

### Load results

In [None]:
def load_results(directory, filename_suffix=""):
    files = glob.glob(os.path.join(directory, "*.csv"))
    results = {}
    for file in files:
        df = pd.read_csv(file)
        testcase_name = (
            os.path.basename(file).replace(".csv", "").replace(filename_suffix, "")
        )
        results[testcase_name] = df
    return results

In [None]:
results_dir = "results/"

mutations_results_dir = os.path.join(results_dir, "mutations")
population_results_dir = os.path.join(results_dir, "population")

mutation_results = load_results(mutations_results_dir, "mutation_stats")
population_results = load_results(population_results_dir, "population_stats")

print(mutation_results)

In [None]:
def plot_stats_subplot(ax, stat_cols):
    pass