In [16]:
import pandas as pd
import polars as pl
import polars.selectors as cs
import numpy as np
import plotnine as pn
import plotly.graph_objs as go
import plotly.express as px
from tqdm.notebook import tqdm
from IPython.display import clear_output, display
import os
from itertools import product
from deap import base, creator, tools, algorithms

# Import tuning libraries
from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
from ray.tune.schedulers import ASHAScheduler
from ray.tune import JupyterNotebookReporter

%load_ext blackcellmagic

The blackcellmagic extension is already loaded. To reload it, use:
  %reload_ext blackcellmagic


In [17]:
def rastrigin(x, y):
    return 20 + (x**2 - 10 * np.cos(2 * np.pi * x)) + (y**2 - 10 * np.cos(2 * np.pi * y))

# Set the evaluation function
def evalOneMin(individual):
    return rastrigin(individual[0], individual[1]),

def feasible(individual):
    """Feasibility function for the individual. Returns True if feasible False
    otherwise."""
    
    if np.sum(np.abs(individual)) <= 10:
        return True
    return False

In [18]:
def trainable(config, total_generations):
   # Set fixed variables
   INDIVIDUAL_SIZE = 2

   # Objective Direction
   creator.create("FitnessMin", base.Fitness, weights=(-1.0,))

   # Invidiaul Structure
   creator.create("Individual", list, fitness=creator.FitnessMin)

   # Initialise the toolbox
   toolbox = base.Toolbox()

   # Attribute generator for individual genes
   toolbox.register("attr_float", np.random.uniform, 4, 5)

   # Structure initializers
   toolbox.register(
      "individual",
      tools.initRepeat,
      creator.Individual,
      toolbox.attr_float,
      INDIVIDUAL_SIZE,
   )
   toolbox.register("population", tools.initRepeat, list, toolbox.individual)

   # Genetic Operators
   toolbox.register("evaluate", evalOneMin)
   toolbox.decorate("evaluate", tools.DeltaPenalty(feasible, 1e6))
   toolbox.register("mate", tools.cxOnePoint)
   toolbox.register(
      "mutate",
      tools.mutGaussian,
      mu=0,
      sigma=config["MUT_SD"],
      indpb=config["MUT_IND_PB"],
   )
   toolbox.register("select", tools.selTournament, tournsize=config["TOURNAMENT_SIZE"])

   # Creating the initial
   population = toolbox.population(n=config["POP_SIZE"])

   # Begin the evolution
   for generation in range(total_generations):

      # Select the next generation individuals
      offspring = toolbox.select(population, len(population))

      # Clone the selected individuals
      offspring = list(map(toolbox.clone, offspring))

      # Apply crossover and mutation on the offspring
      for child1, child2 in zip(offspring[::2], offspring[1::2]):
         if np.random.random() < config["CXPB"]:
               toolbox.mate(child1, child2)
               del child1.fitness.values
               del child2.fitness.values

      for mutant in offspring:
         if np.random.random() < config["MUTPB"]:
               toolbox.mutate(mutant)
               del mutant.fitness.values

      # Evaluate the individuals with an invalid fitness
      # Those are the ones that have been deleted
      invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
      fitnesses = map(toolbox.evaluate, invalid_ind)
      for ind, fit in zip(invalid_ind, fitnesses):
         ind.fitness.values = fit

      # Overwrite the current population with the offspring
      population[:] = offspring

      # Increment counter
      generation += 1

      # Get fitnesses
      fitnesses = [ind.fitness.values[0] for ind in population]
      fit_avg = np.mean(fitnesses)
      train.report({"avg_fitness": np.mean(fitnesses)})

In [19]:
TOTAL_GENERATIONS = 50

analysis = tune.run(
    tune.with_parameters(trainable, total_generations=TOTAL_GENERATIONS),
    config={
        "CXPB": tune.uniform(0.2, 0.8),
        "MUTPB": tune.uniform(0.05, 0.95),
        "MUT_SD": tune.uniform(0.01, 5),
        "MUT_IND_PB": tune.uniform(0.05, 0.95),
        "TOURNAMENT_SIZE": tune.randint(1, 10),
        "POP_SIZE": tune.choice([50, 250, 500, 1000, 5000]),
    },
    metric="avg_fitness",
    mode="min",
    local_dir="tune_results",
    name="GA",
    search_alg=OptunaSearch(),
    scheduler=ASHAScheduler(
        time_attr="training_iteration",
        grace_period=TOTAL_GENERATIONS/2,
        reduction_factor=1.5
    ),
    num_samples=100,
    trial_dirname_creator=lambda trial: f"{trial.trainable_name}_{trial.trial_id}",
)

2024-03-27 20:54:10,701	INFO tune.py:613 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-03-27 20:54:10,718] A new study created in memory with name: optuna


0,1
Current time:,2024-03-27 21:00:42
Running for:,00:06:32.23
Memory:,11.9/15.8 GiB

Trial name,status,loc,CXPB,MUTPB,MUT_IND_PB,MUT_SD,POP_SIZE,TOURNAMENT_SIZE,iter,total time (s),avg_fitness
trainable_0e315f71,TERMINATED,127.0.0.1:21224,0.343637,0.686279,0.350756,3.19809,500,7,50,0.921999,10009.4
trainable_d30f8a86,TERMINATED,127.0.0.1:29876,0.414928,0.544993,0.302689,0.635387,5000,3,50,9.48093,3.64717
trainable_56add3c0,TERMINATED,127.0.0.1:25280,0.636154,0.549506,0.33464,1.05684,500,9,50,0.965284,4.3769
trainable_2d68da03,TERMINATED,127.0.0.1:976,0.259058,0.684495,0.476355,3.53546,500,6,50,0.749706,6013.02
trainable_1ccc4ff9,TERMINATED,127.0.0.1:10840,0.26328,0.265419,0.425336,3.36459,5000,6,50,8.64708,3604.51
trainable_9bf6cfb6,TERMINATED,127.0.0.1:14984,0.485894,0.33085,0.529732,0.913885,1000,3,50,2.13514,4.11155
trainable_50d52cf0,TERMINATED,127.0.0.1:20468,0.511109,0.0915839,0.782936,4.31668,50,6,50,0.0753291,40002.9
trainable_365ff9a3,TERMINATED,127.0.0.1:17024,0.486667,0.811022,0.0756362,3.87323,50,6,50,0.0875981,20001.8
trainable_0a524efc,TERMINATED,127.0.0.1:1524,0.300597,0.160152,0.636807,3.78603,1000,5,50,1.66751,7003.56
trainable_787c4db2,TERMINATED,127.0.0.1:9244,0.556792,0.325929,0.919807,0.623608,50,7,50,0.0906086,4.54315


Trial name,avg_fitness
trainable_09cf400a,0.319084
trainable_0a524efc,7003.56
trainable_0a864768,532025.0
trainable_0c26ce29,0.575561
trainable_0e315f71,10009.4
trainable_0f790c06,0.737051
trainable_0fd931bc,400.474
trainable_126f99eb,0.398126
trainable_14fa2bf5,0.368261
trainable_15d970d2,3.35406


[36m(trainable pid=21224)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit
[36m(trainable pid=976)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit
[36m(trainable pid=29876)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit
[36m(trainable pid=14984)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit
[36m(trainable pid=20468)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit
[36m(traina

In [20]:
analysis.best_config

{'CXPB': 0.26175563712639643,
 'MUTPB': 0.05075896644487842,
 'MUT_SD': 1.9231079843545766,
 'MUT_IND_PB': 0.10469865034703035,
 'TOURNAMENT_SIZE': 2,
 'POP_SIZE': 5000}

In [21]:
fig = px.line(
    data_frame=pd.concat(analysis.trial_dataframes.values()),
    x="training_iteration",
    y="avg_fitness",
    color="trial_id",
)
fig.show()

In [22]:
top_runs = analysis.dataframe().sort_values("avg_fitness").head(10)["trial_id"].to_list()

fig = px.line(
    data_frame=pd.concat(analysis.trial_dataframes.values()).query(
        "trial_id in @top_runs"
    ),
    x="training_iteration",
    y="avg_fitness",
    color="trial_id",
)
fig.show()

In [23]:
(
    analysis.dataframe()
    .query("trial_id == '47670c6e'")
    .filter(regex="config")
    .melt()
    .assign(variable=lambda x: x["variable"].str.replace("config/", ""))
    .set_index("variable")
    .to_dict()["value"]
)

{'CXPB': 0.22808547604749183,
 'MUTPB': 0.05313153710963216,
 'MUT_SD': 2.5861177862246074,
 'MUT_IND_PB': 0.1359577654069344,
 'TOURNAMENT_SIZE': 3.0,
 'POP_SIZE': 5000.0}

TBD: How to analyse the "wigglyness" of the line? Not only the lowest score but also the training behaviour should count into why a solution is good or bad.