In [1]:
import random
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from copy import deepcopy
import data_generation as dg
import time

In [2]:
graph, random_solution, total_edge_length = dg.read_instance('data\data70.txt')
num_nodes = len(graph)

In [3]:
class Individual:
    def __init__(self, num_nodes, graph):
        self.num_nodes = num_nodes
        self.graph = graph
        self.code = self.generate_solution(self.num_nodes)
        self.fitness = self.calc_fitness(self.num_nodes, self.graph)
    
    def generate_solution(self, num_nodes):
        nodes = list(range(num_nodes))
        random.shuffle(nodes)
        return nodes

    def calc_fitness(self, num_nodes, graph):
        total_length = 0
        for i in range(len(graph)):
            for j in range(i + 1, len(graph)):
                if graph[i][j] == 1:
                    position_i = self.code.index(i)
                    position_j = self.code.index(j)
                    total_length += abs(position_i - position_j)
        return -total_length

In [4]:
def tournament_selection(population, tour_size, forbidden):
    tournament = random.sample(range(len(population)), tour_size)
    #selected = max(tournament, key=lambda x: x.fitness)
    best_i = -1
    best_fitness = population[tournament[0]].fitness
    for i in tournament:
        #print('fitness:', population[i].fitness)
        if population[i].fitness >= best_fitness and i != forbidden:
            best_fitness = population[i].fitness
            best_i = i

    #print('best_fitness:', best_fitness)
    return best_i

In [5]:
# its like a wheel in a casino but instead of having equal proportions to
# for wheels slots we have different proportions(probabilities) for each slot
# its size depends on the fitness of an individual
# sum(i from 0 to n)pi where pi is probability of each one
def roulette_selection(population):
    total_fitness = sum(ind.fitness for ind in population)
    fitness_props = [ind.fitness / total_fitness for ind in population]
    cumulative_props = [sum(fitness_props[:i+1] for i in range(len(fitness_props)))]
    
    rand_prop = random.uniform(0,1)
    for i, prob in enumerate(cumulative_props):
        if rand_prop < prob:
            return population[i]

In [6]:
#with linear ranking selection is not dependable on fitness but fitness rank
#this way worse individuals can get a chance to be choosen and maybe they can 
# give better results
def rank_selection_linear_ranking(population, num_selections,forbidden,selection_pressure = 1.5):
    n = len(population)
    ranked_population = sorted(population, key = lambda x: x.fitness, reverse=True)
    
    probs = [(2-selection_pressure)/n + 2*i*(selection_pressure-1)/(n*(n-1)) for i in range(n)]
    
    selection = random.choices(ranked_population, weights=probs, k = num_selections)
    max_fitness = float('-inf')
    best_i = -1
    for i in range(len(selection)):
        if population[i].fitness > max_fitness and i != forbidden:
            max_fitness = population[i].fitness
            best_i = i
    
    return best_i

In [7]:
def rank_selection(population, num_selections, forbidden):
    n = len(population)
    ranked_population = sorted(population, key = lambda x: x.fitness, reverse=True)
    
    probs = [i/n for i in range(1, n + 1)]
    
    selection = random.choices(ranked_population, weights=probs, k = num_selections)

    max_fitness = float('-inf')
    best_i = -1
    for i in range(len(selection)):
        if population[i].fitness > max_fitness and i != forbidden:
            max_fitness = population[i].fitness
            best_i = i
    
    return best_i

In [8]:
#beacause we are dealing with permutations, we have to be careful here and used sepcialized crossover algorithms
#ordered crossover method
def ordered_crossover(parent1, parent2, cx_point1 = -1, cx_point2 = -1):
    
    n = len(parent1.code)

    # Choose two random crossover points
    if cx_point1 == -1 and cx_point2 == -1:
        cx_point1, cx_point2 = sorted(random.sample(range(n), 2))
        #print(cx_point1, cx_point2)

    # Copy the segment between the crossover points from parent1 to child1
    child1_segment = parent1.code[cx_point1:cx_point2 + 1]

    # Fill the remaining positions in child1 with elements from parent2
    child1 = [-1] * n
    child1[cx_point1:cx_point2 + 1] = child1_segment
    remaining_positions = [i for i in parent2.code if i not in child1_segment]
    j = 0
    for i in range(n):
        if child1[i] == -1:
            child1[i] = remaining_positions[j]
            j += 1

    return child1, cx_point1, cx_point2


In [9]:
def pmx_crossover(parent1, parent2):
    n = len(parent1.code)
    start, end = sorted(random.sample(range(n), 2))
    
    child = [-1] * n
    child[start:end + 1] = parent1.code[start:end + 1]

    for i in range(start, end + 1):
        if parent2.code[i] not in child:
            #print(parent1.code)
            #print(parent2.code)
            index = parent2.code.index(parent1.code[i])
            #this next lines are the only difference between pmx and ordered
            #we dont change child1 in a ordered way but according to previous indexes
            while child[index] != -1:
                index = parent2.code.index(parent1.code[index])
            child[index] = parent2.code[i]
            
            
    for i in range(n):
        if child[i] == -1:
            child[i] = parent2.code[i]

    return child

In [10]:
def mutation_swap(individual, mutation_prob):
    for i in range(len(individual.code)):
        if random.random() < mutation_prob:
            rand_i = random.choice(range(len(individual.code)))
            
            tmp = individual.code[i]

            individual.code[i] = individual.code[rand_i]

            individual.code[rand_i] = tmp

In [11]:
def mutation_inverse(individual, mutation_prob):
    for i in range(len(individual.code)):
        if random.random() < mutation_prob:
            index1, index2 = sorted(random.sample(range(len(individual.code)), 2))

            individual.code[index1:index2+1] = reversed(individual.code[index1:index2+1])

In [12]:
def mutation_scramble(individual, mutation_prob):
    for i in range(len(individual.code)):
        if random.random() < mutation_prob:
            start, end = sorted(random.sample(range(len(individual.code)), 2))

            subset = individual.code[start:end+1]

            random.shuffle(subset)
            
            individual.code[start:end+1] = subset

In [13]:
def genetic_algo(population_size, graph, num_nodes,num_generations,tournament_size, mutation, mutation_prob, elitism_size, crossover, selection, ordered_cross = False):
    #if use_elitism and (population_size - elitism_size) % 2 == 1:
    #    elitism_size += 1
    
    population = [Individual(num_nodes, graph) for _ in range(population_size)]
    new_population = population.copy()
    
    for i in range(num_generations):
        population.sort(key=lambda x: x.fitness, reverse=True)
        new_population[:elitism_size] = population[:elitism_size]
        for j in range(elitism_size, population_size, 2):
            parent1_i = selection(population, tournament_size, forbidden=-2)
            parent2_i = selection(population, tournament_size, parent1_i)
            #disabled - causes bloated file size
            #print('gen:', i, 'iter:', j, 'parents:', parent1_i, parent2_i)
            if ordered_cross:
                new_population[j].code, i1, i2 = crossover(population[parent1_i], population[parent2_i])
                new_population[j+1].code, _, _ = crossover(population[parent2_i], population[parent1_i], i1, i2)
            else:    
                new_population[j].code = crossover(population[parent1_i], population[parent2_i])
                new_population[j+1].code = crossover(population[parent1_i], population[parent2_i])
        
            mutation(new_population[j], mutation_prob)
            mutation(new_population[j+1], mutation_prob)
            
            new_population[j].fitness = new_population[j].calc_fitness(num_nodes, graph)
            new_population[j+1].fitness = new_population[j+1].calc_fitness(num_nodes, graph)
            
        population = new_population.copy()
    return max(population, key=lambda x: x.fitness)

In [14]:
#too many possible combinations to manually write - we itterate
#TODO proper implementation of roulette selection

n = len(graph)

oc = False

selections = [tournament_selection, rank_selection, rank_selection_linear_ranking]
crossovers = [pmx_crossover, ordered_crossover]
mutations = [mutation_swap, mutation_inverse, mutation_scramble]

number_of_combinations = 0

sols = []
values =[]
times = []

methods = []

results = []

best = float('inf')
best_i = -1

average = [0,0]

for selection in selections:
    for crossover in crossovers:
        for mutation in mutations:
            if crossover.__name__ == 'ordered_crossover':
                oc = True

            start = time.time()    
            res = genetic_algo(population_size=100, 
                        graph=graph, 
                        num_nodes=n, 
                        num_generations=30, 
                        tournament_size=30, 
                        elitism_size=4, 
                        mutation=mutation, 
                        mutation_prob=0.05, 
                        crossover=crossover, 
                        selection=selection, 
                        ordered_cross=oc)
            end = time.time()
            duration = float("{:.2f}".format(end - start))
            
            print(selection.__name__, crossover.__name__, mutation.__name__, ' : ', res.code, res.fitness, '\n')
            
            oc = False

            sols.append(res.code)
            values.append(abs(res.fitness))
            times.append(duration)

            methods.append(str(selection.__name__ + ', ' + crossover.__name__ + ', ' + mutation.__name__))

            results.append({'Dim': num_nodes, 'Method': methods[number_of_combinations], 'Value': abs(res.fitness), 'Time': duration})

            if abs(res.fitness) == best:
                if duration <= times[best_i]:    
                    best = abs(res.fitness)
                    best_i = number_of_combinations

            if abs(res.fitness) < best:
                best = abs(res.fitness)
                best_i = number_of_combinations

            average[0] += abs(res.fitness)
            average[1] += duration

            number_of_combinations += 1    


tournament_selection pmx_crossover mutation_swap  :  [43, 15, 29, 13, 37, 8, 45, 24, 59, 6, 0, 57, 19, 34, 11, 58, 51, 50, 48, 9, 66, 64, 28, 22, 44, 27, 41, 61, 26, 36, 16, 49, 53, 4, 62, 40, 54, 25, 67, 35, 69, 7, 46, 55, 31, 63, 32, 60, 56, 12, 2, 39, 10, 1, 65, 52, 42, 5, 38, 23, 33, 17, 30, 20, 68, 3, 18, 14, 47, 21] -24483 

tournament_selection pmx_crossover mutation_inverse  :  [13, 35, 8, 5, 34, 31, 29, 21, 61, 47, 19, 57, 22, 7, 40, 66, 26, 51, 36, 58, 44, 6, 49, 0, 18, 63, 59, 10, 55, 65, 20, 12, 53, 56, 41, 64, 68, 14, 67, 28, 30, 2, 42, 38, 46, 3, 54, 69, 32, 50, 60, 25, 1, 45, 9, 52, 62, 48, 23, 16, 43, 24, 4, 39, 17, 15, 33, 27, 37, 11] -25533 

tournament_selection pmx_crossover mutation_scramble  :  [5, 66, 29, 64, 47, 37, 48, 54, 6, 10, 18, 28, 26, 53, 41, 36, 34, 27, 31, 52, 22, 62, 51, 7, 65, 35, 16, 61, 25, 19, 39, 59, 63, 58, 12, 14, 55, 32, 4, 40, 20, 50, 2, 69, 46, 49, 0, 44, 17, 67, 11, 23, 42, 33, 68, 45, 60, 9, 43, 57, 8, 56, 21, 1, 30, 24, 3, 13, 15, 38] -25

In [15]:
df = pd.DataFrame(results)
display(df)
df.to_csv('comparison_tables/genetic_algorithms.csv', mode='a', header=not pd.io.common.file_exists('comparison_tables/genetic_algorithms.csv'), index=False)

Unnamed: 0,Dim,Method,Value,Time
0,70,"tournament_selection, pmx_crossover, mutation_...",24483,3.24
1,70,"tournament_selection, pmx_crossover, mutation_...",25533,3.3
2,70,"tournament_selection, pmx_crossover, mutation_...",25041,3.38
3,70,"tournament_selection, ordered_crossover, mutat...",24605,3.27
4,70,"tournament_selection, ordered_crossover, mutat...",25461,3.3
5,70,"tournament_selection, ordered_crossover, mutat...",25112,3.42
6,70,"rank_selection, pmx_crossover, mutation_swap",24708,3.26
7,70,"rank_selection, pmx_crossover, mutation_inverse",25632,3.37
8,70,"rank_selection, pmx_crossover, mutation_scramble",25581,3.46
9,70,"rank_selection, ordered_crossover, mutation_swap",24901,3.31


In [16]:
print('best:', methods[best_i], values[best_i], times[best_i])
df_best = pd.DataFrame({'Dim': num_nodes, 'Method':  methods[best_i], 'Value': values[best_i], 'Time': times[best_i]}, index=[0])
display(df_best)

best: tournament_selection, pmx_crossover, mutation_swap 24483 3.24


Unnamed: 0,Dim,Method,Value,Time
0,70,"tournament_selection, pmx_crossover, mutation_...",24483,3.24


In [17]:
df = pd.read_csv('comparison_tables/bests.csv')
row_to_update = df[df['Dim'] == num_nodes]

# Check if the row exists
if not row_to_update.empty:
    # Update specific columns in the located row
    df.loc[row_to_update.index, 'genetic_alg'] = values[best_i]
    df.loc[row_to_update.index, 'genetic_alg_time'] = times[best_i]

    # Save the updated DataFrame back to the CSV file
    df.to_csv('comparison_tables/bests.csv', index=False)
else:
    new_row_data = {'Dim': num_nodes, 'genetic_alg': values[best_i], 'genetic_alg_time': times[best_i]}
    df.loc[len(df)] = new_row_data
    df.to_csv('comparison_tables/bests.csv', index=False)

In [18]:
average = [average[0] / number_of_combinations, average[1] / number_of_combinations]
average = [round(num, 2) for num in average]
print('average:', average)
df_avg = pd.DataFrame({'Dim': num_nodes, 'Value': average[0], 'Time': average[1]}, index=[0])
display(df_avg)

average: [25226.0, 3.38]


Unnamed: 0,Dim,Value,Time
0,70,25226.0,3.38


In [19]:
df = pd.read_csv('comparison_tables/averages.csv')
row_to_update = df[df['Dim'] == num_nodes]

# Check if the row exists
if not row_to_update.empty:
    # Update specific columns in the located row
    df.loc[row_to_update.index, 'genetic_alg'] = average[0]
    df.loc[row_to_update.index, 'genetic_alg_time'] = average[1]

    # Save the updated DataFrame back to the CSV file
    df.to_csv('comparison_tables/averages.csv', index=False)
else:
    df.loc[len(df)] = {'Dim': num_nodes, 'genetic_alg': average[0], 'genetic_alg_time': average[1]}
    df.to_csv('comparison_tables/averages.csv', index=False)