# Genetic algorithm tests for the Courier Problem

In [1]:
import os
import sys

sys.path.append(os.path.abspath("../src"))

os.environ["PYTHONPATH"] = os.path.abspath("../src")

import ray

if ray.is_initialized():
    ray.shutdown()

ray.init(
    runtime_env={"env_vars": {"PYTHONPATH": os.environ["PYTHONPATH"]}},
    include_dashboard=True,
    dashboard_port=8265,
)


2025-06-14 04:25:15,379	INFO worker.py:1879 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


0,1
Python version:,3.12.3
Ray version:,2.46.0
Dashboard:,http://127.0.0.1:8265


[33m(raylet)[0m [2025-06-14 04:34:15,302 E 239590 239590] (raylet) node_manager.cc:3287: 6 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 0789e908b3b856052da63cd60fe5f89cea96aef01a22822e6360d278, IP: 172.24.109.133) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 172.24.109.133`
[33m(raylet)[0m 
[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.
[33m(raylet)[0m 
[33m(raylet)[0m [2025-06-14 04:35:15,303 E 239590 239590] (rayl

In [2]:
from itertools import chain, combinations


def get_all_subsets(lst):
    return list(chain.from_iterable(combinations(lst, r) for r in range(len(lst) + 1)))


In [3]:
from ga.mutations import (
    CouriersMutation,
    Mutation,
    NewCourierMutation,
    PackagesMutation,
    RouteMutation,
    UnusedVehiclesMutation,
    UsedVehiclesMutation,
)

all_mutations: list[Mutation] = [
    UsedVehiclesMutation,
    UnusedVehiclesMutation,
    CouriersMutation,
    PackagesMutation,
    RouteMutation,
    NewCourierMutation,
]

mutation_suite = get_all_subsets(all_mutations)
print(mutation_suite)

[(), (<class 'ga.mutations.vehicles.UsedVehiclesMutation'>,), (<class 'ga.mutations.vehicles.UnusedVehiclesMutation'>,), (<class 'ga.mutations.couriers.CouriersMutation'>,), (<class 'ga.mutations.packages.PackagesMutation'>,), (<class 'ga.mutations.route.RouteMutation'>,), (<class 'ga.mutations.couriers.NewCourierMutation'>,), (<class 'ga.mutations.vehicles.UsedVehiclesMutation'>, <class 'ga.mutations.vehicles.UnusedVehiclesMutation'>), (<class 'ga.mutations.vehicles.UsedVehiclesMutation'>, <class 'ga.mutations.couriers.CouriersMutation'>), (<class 'ga.mutations.vehicles.UsedVehiclesMutation'>, <class 'ga.mutations.packages.PackagesMutation'>), (<class 'ga.mutations.vehicles.UsedVehiclesMutation'>, <class 'ga.mutations.route.RouteMutation'>), (<class 'ga.mutations.vehicles.UsedVehiclesMutation'>, <class 'ga.mutations.couriers.NewCourierMutation'>), (<class 'ga.mutations.vehicles.UnusedVehiclesMutation'>, <class 'ga.mutations.couriers.CouriersMutation'>), (<class 'ga.mutations.vehicles.

In [4]:
import glob
import os

from model import Problem
from utils import load_from_json

data_dir = "problems"
json_files = glob.glob(os.path.join(data_dir, "*.json"))

data: list[tuple[str, Problem]] = []
for json_file in json_files:
    testcase_name = os.path.basename(json_file).replace(".json", "")
    problem = load_from_json(json_file)
    data.append((testcase_name, problem))

data.sort(key=lambda x: x[0])

In [5]:
GA_RUN_PATIENCE = 100
GA_INITIAL_POPULATION_SIZE = 50
GA_MAX_RUN_ITERATIONS = 300
GA_RUN_REPEAT = 10

CONFIG = {
    "GA_RUN_PATIENCE": GA_RUN_PATIENCE,
    "GA_INITIAL_POPULATION_SIZE": GA_INITIAL_POPULATION_SIZE,
    "GA_MAX_RUN_ITERATIONS": GA_MAX_RUN_ITERATIONS,
    "GA_RUN_REPEAT": GA_RUN_REPEAT,
}

In [6]:
import time

import numpy as np

from ga import GA


@ray.remote
def run_single_ga_repeat(problem, mutations, population, config):
    GA_RUN_PATIENCE = config["GA_RUN_PATIENCE"]
    GA_MAX_RUN_ITERATIONS = config["GA_MAX_RUN_ITERATIONS"]

    best_solution_cost = np.inf
    patience = GA_RUN_PATIENCE
    num_iterations = 0

    for mutation in all_mutations:
        if mutation not in mutations:
            mutation.proba = 0
        else:
            mutation.proba = 0.5

    ga = GA(problem=problem, initial_population=population, C=1.2, alpha=0.9)

    start_time = time.perf_counter()

    for state in ga.run(max_iter=GA_MAX_RUN_ITERATIONS):
        current_cost = ga.get_cost(state.solution)

        if current_cost < best_solution_cost:
            best_solution_cost = current_cost
            patience = GA_RUN_PATIENCE
        else:
            patience -= 1

        if patience <= 0:
            break
        num_iterations += 1

    elapsed_time = time.perf_counter() - start_time

    return {
        "cost": best_solution_cost,
        "iterations": num_iterations,
        "time": elapsed_time,
        "cost_func_evals": ga._cost_function_runs,
    }


@ray.remote
def run_suite_for_testcase_remote(
    testcase_name, problem, mutations, population, config
):
    GA_RUN_REPEAT = config["GA_RUN_REPEAT"]

    futures = [
        run_single_ga_repeat.remote(problem, mutations, population, config)
        for _ in range(GA_RUN_REPEAT)
    ]

    results = ray.get(futures)

    best_costs = [r["cost"] for r in results]
    iterations_list = [r["iterations"] for r in results]
    times = [r["time"] for r in results]
    cost_func_evals = [r["cost_func_evals"] for r in results]

    stats = {
        "testcase": testcase_name,
        "mutation_suite": [m.__name__ for m in mutations],
        "cost_mean": np.mean(best_costs),
        "cost_std": np.std(best_costs),
        "cost_max": max(best_costs),
        "cost_min": min(best_costs),
        "cost_median": np.median(best_costs),
        "iterations_mean": np.mean(iterations_list),
        "iterations_std": np.std(iterations_list),
        "iterations_min": min(iterations_list),
        "iterations_max": max(iterations_list),
        "iterations_median": np.median(iterations_list),
        "time_mean": np.mean(times),
        "time_std": np.std(times),
        "time_min": min(times),
        "time_max": max(times),
        "time_median": np.median(times),
        "cost_func_evals_min": min(cost_func_evals),
        "cost_func_evals_max": max(cost_func_evals),
        "cost_func_evals_mean": np.mean(cost_func_evals),
        "cost_func_evals_std": np.std(cost_func_evals),
        "cost_func_evals_median": np.median(cost_func_evals),
    }

    stats = {k: round(v, 2) if isinstance(v, float) else v for k, v in stats.items()}

    return stats

In [7]:
import os

import pandas as pd
from tqdm.notebook import tqdm

from generator import Generator

results_dir = "results"
os.makedirs(results_dir, exist_ok=True)

for testcase_name, problem in tqdm(data, desc="Testcases"):
    generator = Generator(problem=problem)
    population = generator.generate_many_feasible(
        num_to_find=GA_INITIAL_POPULATION_SIZE, max_attempts=1000, verbose=False
    )

    futures = []
    metadata = []
    for mutations in mutation_suite:
        future = run_suite_for_testcase_remote.remote(
            testcase_name, problem, mutations, population, config=CONFIG
        )
        futures.append(future)
        metadata.append([m.__name__ for m in mutations])

    testcase_results = []
    with tqdm(total=len(futures), desc=f"Mutations for {testcase_name}") as pbar:
        for i, future in enumerate(futures):
            try:
                res = ray.get(future)
                testcase_results.append(res)
            except Exception as e:
                print(
                    f"ERROR in testcase {testcase_name}, mutations {metadata[i]}: {e}"
                )
            pbar.update(1)

    if testcase_results:
        df = pd.DataFrame(testcase_results)
        filename = os.path.join(results_dir, f"{testcase_name}_results.csv")
        df.to_csv(filename, index=False)


Testcases:   0%|          | 0/5 [00:00<?, ?it/s]

Mutations for 01-one-courier:   0%|          | 0/64 [00:00<?, ?it/s]

Mutations for 02-four-couriers:   0%|          | 0/64 [00:00<?, ?it/s]

Mutations for 03-big:   0%|          | 0/64 [00:00<?, ?it/s]

Mutations for 04-medium:   0%|          | 0/64 [00:00<?, ?it/s]

Mutations for 05-small:   0%|          | 0/64 [00:00<?, ?it/s]

In [8]:
# POPULATION_SUITE = [10, 25, 50, 100, 200, 300, 500]


# for testcase_name, problem in tqdm(data, desc="Testcases"):
#     generator = Generator(problem=problem)

#     for population_size in POPULATION_SUITE:
#         population = generator.generate_many_feasible(
#             num_to_find=population_size, max_attempts=100000, verbose=False
#         )

#     futures = []
#     metadata = []
#     for mutations in mutation_suite:
#         future = run_suite_for_testcase_remote.remote(
#             testcase_name, problem, mutations, population, config=CONFIG
#         )
#         futures.append(future)
#         metadata.append([m.__name__ for m in mutations])

#     testcase_results = []
#     with tqdm(total=len(futures), desc=f"Mutations for {testcase_name}") as pbar:
#         for i, future in enumerate(futures):
#             try:
#                 res = ray.get(future)
#                 testcase_results.append(res)
#             except Exception as e:
#                 print(
#                     f"ERROR in testcase {testcase_name}, mutations {metadata[i]}: {e}"
#                 )
#             pbar.update(1)

#     if testcase_results:
#         df = pd.DataFrame(testcase_results)
#         filename = os.path.join(results_dir, f"{testcase_name}_results.csv")
#         df.to_csv(filename, index=False)