In [1]:
import os
import pandas as pd
import random
from copy import deepcopy
os.chdir(os.pardir) # comment if you are running this more than once
data_matrix_df = pd.read_csv('Data/distance_matrix_official.csv', index_col=0)
data_matrix_np = data_matrix_df.to_numpy()
data_matrix_np.shape

(68, 68)

In [2]:
from Genetic_algorithm.fitness import ResourceFitness
from Genetic_algorithm.genome import Genome
from Genetic_algorithm.solution_rd import SolutionRD
from Genetic_algorithm.mutations import logistic_mutation, social_mutation, logistic_mutation_2
from Genetic_algorithm.crossovers import social_crossover, logistic_crossover_2, full_crossover
from Genetic_algorithm.selection_algorithms import tournament_selection,rank_selection

my_fitness = ResourceFitness(data_matrix_np)

first_solution = SolutionRD(my_fitness, Genome, [logistic_mutation, social_mutation], [logistic_crossover_2, social_crossover])
secomnd_solution = SolutionRD(my_fitness, Genome, [logistic_mutation, social_mutation], [logistic_crossover_2, social_crossover])

In [None]:
import tqdm
def     genetic_algorithm(
    gen_count: int,
    selection_algorithm: list[callable],
    mutation_algorithms: list[callable],
    crossover_algorithms: list[callable],
    fitness_instance: ResourceFitness,
    Genome_class: Genome,
    maximization: bool = False,
    xo_prob: float = 0.9,
    mut_prob: float = 0.2,
    social_mutation_prob: float = 0.2,
    social_crossover_prob: float = 0.2,
    population_size: int = 10,
    elitism: bool = True,
    save_logs: bool = False,
):
    """
    Genetic Algorithm for solving optimization problems.
    This function implements a genetic algorithm that evolves a population of solutions
    over a specified number of generations. It uses selection, mutation, and crossover
    algorithms to create new solutions and improve the population's fitness.
    The algorithm can be configured with various parameters, including the number of
    generations, selection method, mutation and crossover probabilities, and whether
    to use elitism.

    Args:
        gen_count (int): number of generations to evolve.
        selection_algorithm (list[callable]): selection algorithm to use.
        mutation_algorithms (list[callable]): choose from Genetic_algotithm.mutations
        crossover_algorithms (list[callable]): choose from Genetic_algorithm.crossovers
        fitness_instance (ResourceFitness): instancee of the fitness class (it's important to instanciate it before as it holds the data matrix)
        Genome_class (Genome): class of the genome to be used (will be instanciated in the function)
        maximization (bool, optional): _description_. Defaults to False.
        xo_prob (float, optional): _description_. Defaults to 0.9.
        mut_prob (float, optional): _description_. Defaults to 0.2.
        social_mutation_prob (float, optional): _description_. Defaults to 0.2.
        social_crossover_prob (float, optional): _description_. Defaults to 0.2.
        elitism (bool, optional): _description_. Defaults to True.
        verbose (bool, optional): _description_. Defaults to False.
        
    """
    
    
    
    first_solution = SolutionRD(fitness_instance, Genome_class, mutation_algorithms, crossover_algorithms)
    
    first_solution.prob_social_mutation = social_mutation_prob
    first_solution.prob_social_crossover = social_crossover_prob
    first_solution.initial_population = population_size
    
    GA_population = [random_solution for random_solution in first_solution]
    
    if save_logs:
        logs = []
    for generation in tqdm.tqdm(range(gen_count), desc="Runnig genetic alogrithm", unit=" generation"):
        fitness_instance.number_of_calls = 0
            
        # Selection
        selected_individuals = [selection_algorithm(GA_population, maximization) for _ in range(population_size)]
        

        # Crossover
        new_population = []
        for i in range(0, population_size):
            for j in range(i+1, population_size):
                if random.random() < xo_prob:
                    parent1 = selected_individuals[i]
                    parent2 = selected_individuals[j]
                    child1, child2 = parent1 @ parent2
                    new_population.append(child1)
                    new_population.append(child2)
        
        if not new_population:
            new_population = selected_individuals.copy()
                   
        
        # Mutation
        for individual in new_population:
            if random.random() < mut_prob:
                individual.mutation()
        
        # Elitism
        if elitism:
            best_individual = min(new_population) if not maximization else max(new_population)
            new_population[0] = best_individual
        
        
        GA_population = new_population
        if save_logs:
            
            n_fitness_calls = fitness_instance.number_of_calls
            best_individual_of_generation = min(GA_population) if not maximization else max(GA_population)
            best_logistic_fitness = fitness_instance._calculate_logistic_fitness(best_individual_of_generation.genome)
            best_social_fitness = fitness_instance._calculate_social_fitness(best_individual_of_generation.genome)
            row = [n_fitness_calls, float(best_individual_of_generation), best_logistic_fitness, best_social_fitness]

            logs.append(row)

            # logs_df = pd.DataFrame(logs, columns=['n_fitness_calls', 'best_individual_fitness', 'best_logistic_fitness', 'best_social_fitness'])
            # logs_df['cumulative_calls'] = logs_df['n_fitness_calls'].cumsum()
            
            # add here the savin of the logs
            #logs_df.to_csv('logs.csv', index=False)
            
       
    best_individual = min(GA_population) if not maximization else max(GA_population)
    if save_logs:
        return best_individual, logs
    return best_individual

# Example usage
best_solution, logs = genetic_algorithm(
    gen_count=100,
    selection_algorithm=tournament_selection,
    mutation_algorithms=[logistic_mutation, social_mutation],
    crossover_algorithms=[logistic_crossover_2, full_crossover],
    fitness_instance=my_fitness,
    Genome_class=Genome,
    maximization=True,
    xo_prob=0.9,
    mut_prob=0.2,
    social_mutation_prob=0.2,
    social_crossover_prob=0.2,
    population_size=10,
    save_logs= True,
    elitism=True
)
print("Best Solution Genome:", best_solution.genome)
print("Best Solution Fitness:", float(best_solution))
print("Average distance:", best_solution.fitness_instance._calculate_social_fitness(best_solution.genome))

Runnig genetic alogrithm: 100%|██████████| 100/100 [00:05<00:00, 17.47 generation/s]

Best Solution Genome: -111022019352087-164-1-18194-1672503472961-18503-1
Best Solution Fitness: 4.01799201059219
Average distance: 3.7





In [4]:
logs

[[92, 3.402424775063573, np.float64(0.20242477506357298), np.float64(3.2)],
 [80, 3.402424775063573, np.float64(0.20242477506357298), np.float64(3.2)],
 [80, 3.3250995324445065, np.float64(0.22509953244450623), np.float64(3.1)],
 [78, 3.448305601456255, np.float64(0.24830560145625494), np.float64(3.2)],
 [84, 3.7806716142113728, np.float64(0.2806716142113727), np.float64(3.5)],
 [80, 3.645843552512241, np.float64(0.24584355251224113), np.float64(3.4)],
 [84, 3.845843552512241, np.float64(0.24584355251224121), np.float64(3.6)],
 [74, 3.845843552512241, np.float64(0.24584355251224121), np.float64(3.6)],
 [76, 3.880671614211373, np.float64(0.28067161421137277), np.float64(3.6)],
 [76, 3.93245489465078, np.float64(0.23245489465078), np.float64(3.7)],
 [78, 3.8711474070524057, np.float64(0.27114740705240575), np.float64(3.6)],
 [78, 3.8711474070524057, np.float64(0.27114740705240575), np.float64(3.6)],
 [82, 3.8927073169884827, np.float64(0.2927073169884827), np.float64(3.6)],
 [84, 3.94584

In [None]:
import itertools

parameter_grids = {
    'gen_count': [100],
    'selection_algorithm': [tournament_selection, rank_selection],
    'mutation_algorithms': [[logistic_mutation, social_mutation], [logistic_mutation_2, social_mutation]],
    'crossover_algorithms': [[logistic_crossover_2, logistic_crossover_2], [full_crossover,full_crossover]],
    'fitness_instance': [my_fitness],
    'Genome_class': [Genome],
    'maximization': [True],
    'xo_prob': [0.2, 0.8],
    'mut_prob': [0.2, 0.8],
    'social_mutation_prob': [0.5],
    'social_crossover_prob': [0.5],
    'population_size': [10],
    'elitism': [True, False],
    'save_logs': [True],
}

# Create a list to store the results
# Iterate over all combinations of parameters

def run_grid_search(parameter_grids, num_runs_per_combination=30):
    """
    Run a grid search over the specified parameter grids.
    Args:
        parameter_grids (dict): A dictionary where keys are parameter names and values are lists of parameter values.
        num_runs_per_combination (int): Number of runs for each combination of parameters.
    Returns:
        list: A list of tuples containing the parameter combination, run number, best solution, and logs DataFrame.
    """
    # Create a list to store the results
    results = []


    # Generate all combinations of parameters
    keys = list(parameter_grids.keys())
    values = [parameter_grids[key] for key in keys]
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
    
    # This is to create a placeholder_name for the csv file and to link the information about the model to the run
    model_run = 0
    for params in tqdm.tqdm(combinations, desc="Running grid search", unit=" combination"):
        
        # run the genetic algorithm with the current combination of parameters 30 times
        for run_iter in range(num_runs_per_combination):
            
            # Run the genetic algorithm with the current combination of parameters
            best_solution, logs = genetic_algorithm(**params) 
            # Append the result to the results list with an indicator of the run number
            results.append((params, run_iter , best_solution, logs, model_run))

            # If run_iter == 0: then create the dataframe else join through the index and name the column according to the run_iter
            # if run_iter == 0:
            #     logs_df = pd.DataFrame(logs, columns=['n_fitness_calls_0', 'best_individual_fitness', 'best_logistic_fitness', 'best_social_fitness'])
            # else:
            #     logs_df = pd.concat([logs_df, pd.DataFrame(logs, columns=[f'n_fitness_calls_{run_iter}', f'best_individual_fitness{run_iter}', f'best_logistic_fitness{run_iter}', 'fbest_social_fitness{run_iter}'])], axis=1)


        # save the logs_df to a csv file 
        # logs_df.to_csv(f'../Data/Gridsearch_runs/{model_run}')
        
        # increase model_run for next iteration
        model_run += 1

        # log_df = pd.DataFrame(logs, columns=['n_fitness_calls', 'best_individual_fitness', 'best_logistic_fitness', 'best_social_fitness'])
        # log_df['cumulative_calls'] = log_df['n_fitness_calls'].cumsum()
        # Convert the results to a DataFrame
        # results_df = pd.DataFrame(results, columns=['params', 'run_iter', 'best_solution', 'logs', 'model_run'])

    return results    

results = run_


(parameter_grids, num_runs_per_combination = 30)

Running grid search:   0%|          | 0/64 [00:00<?, ? combination/s]

Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 59.51 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 76.40 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 50.50 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 64.00 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 69.35 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 70.75 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 58.93 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 59.26 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 65.34 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 67.37 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 76.94 generation/s]
Runnig genetic alogrithm: 100%|██████████| 100/100 [00:01<00:00, 

In [30]:
results_df = pd.DataFrame([{
    'params': result[0],
    'run_iteration': result[1],
    'best_solution': result[2],
    'logs': result[3]
} for result in results])

results_df.head()


Unnamed: 0,params,run_iteration,best_solution,logs
0,"{'gen_count': 100, 'selection_algorithm': <fun...",0,Fitness: 3.7737068064013948,"[[27, 3.7866280865307878, 0.18662808653078763,..."
1,"{'gen_count': 100, 'selection_algorithm': <fun...",1,Fitness: 3.8312754029143994,"[[34, 3.3949128441321523, 0.19491284413215235,..."
2,"{'gen_count': 100, 'selection_algorithm': <fun...",0,Fitness: 3.892961623069946,"[[30, 3.52570250909972, 0.22570250909972012, 3..."
3,"{'gen_count': 100, 'selection_algorithm': <fun...",1,Fitness: 3.62929354197323,"[[22, 3.200261075827205, 0.2002610758272054, 3..."
4,"{'gen_count': 100, 'selection_algorithm': <fun...",0,Fitness: 3.9839813306698155,"[[32, 2.983312300559735, 0.2833123005597351, 2..."


In [None]:
# save results to a CSV file
results_df = pd.DataFrame([{
    'params': result[0],
    'run_iteration': result[1],
    'best_solution': result[2],
    # logs are defined as:
    # row = [n_fitness_calls, float(best_individual_of_generation), best_logistic_fitness, best_social_fitness]
    'logs': result[3]
} for result in results])
results_df.to_csv('grid_search_results.csv', index=False)