# Andrea Mirenda LAB 2 CI

In [1]:
import pandas as pd
from dataclasses import dataclass
from collections import Counter
import random
import math
import numpy as np
from icecream import ic
from matplotlib import pyplot as plt
from itertools import accumulate
from itertools import combinations
from tqdm.auto import tqdm
import geopy.distance
from geopy.distance import geodesic
import networkx as nx
from collections import deque

  from .autonotebook import tqdm as notebook_tqdm


# Dataset import

In [2]:
CITIES = pd.read_csv('cities/china.csv', header=None, names=['name', 'lat', 'lon'])
CITIES

Unnamed: 0,name,lat,lon
0,Acheng,45.540000,126.960000
1,Aksu,41.150000,80.250000
2,Alaer,40.515556,81.263611
3,Altay,47.840000,88.130000
4,Anbu,23.460000,116.680000
...,...,...,...
721,Ziyang,30.140000,104.640000
722,Zoucheng,35.400000,116.966667
723,Zouxian,35.410000,116.940000
724,Zunhua,40.183333,117.966667


In [3]:
DIST_MATRIX = np.zeros((len(CITIES), len(CITIES)))
for c1, c2 in combinations(CITIES.itertuples(), 2):
    DIST_MATRIX[c1.Index, c2.Index] = DIST_MATRIX[c2.Index, c1.Index] = geodesic(
        (c1.lat, c1.lon), (c2.lat, c2.lon)
    ).km

# Class for the individuals

In [6]:
@dataclass
class Individual:
    genome: list
    fitness : float = None

# Greedy algorithm

In [7]:
def find_closest(segments, city, visited):
    # Filtra i segmenti che contengono 'city' e hanno un'altra città non in 'visited'
    candidates = [
        (pair, distance) for pair, distance in segments
        if city in pair and (other_city := (pair - {city}).pop()) not in visited
    ]
    if len(candidates)==0:
        print("errore")
    closest_segment = min(candidates, key=lambda x: x[1])
    closest_city = (closest_segment[0] - {city}).pop()
    return (int(city), closest_city)

In [8]:
def greedy_sol(city, segments):
    solution = []
    solution.append(city)
    visited = []
    visited.append(int(city))
    while len(visited)<len(CITIES):
        _, c1 = find_closest(segments, city, visited)
        solution.append(c1)
        visited.append(c1)
        city=c1
    solution.append(solution[0])
    
    return solution

In [None]:
starting_city = np.random.randint(0, len(CITIES))
segments = [
        ({c1, c2}, float(DIST_MATRIX[c1, c2])) for c1, c2 in combinations(range(len(CITIES)), 2)
]
greedy_result = greedy_sol(starting_city, segments)

# Evolutionary algorithm

### Fitness definition

In [None]:
def fitness(solution):
    tot_dist=0
    for node in range(len(solution)-1):
        tot_dist -= DIST_MATRIX[solution[node], solution[node+1]]
    return tot_dist

### Mutations and Crossover

Follows some crossover and mutations functions that have been tested for the final version.

In [10]:
def inver_over_mutation(parent1):
    # Copia del primo genitore come base per il figlio
    child = parent1[:]
    
    # Mantieni il primo nodo identico all'ultimo
    start_node = child[0]
    end_node = child[-1]

    if start_node != end_node:
        child.append(start_node)
    
    # Iterazioni di inversioni casuali
    num_inversions = random.randint(1, len(parent1) // 2)  # Numero casuale di inversioni
    
    for _ in range(num_inversions):
        # Seleziona due punti casuali per definire l'intervallo da invertire
        i, j = sorted(random.sample(range(1, len(child) - 1), 2))
        
        # Inversione del sotto-percorso selezionato
        child[i:j + 1] = reversed(child[i:j + 1])
    
    # Assicurati che il figlio sia ciclico (chiudi il ciclo)
    if child[-1] != child[0]:
        child[-1] = child[0]
    
    return child

In [None]:
def scramble_mutation(solution, strength = 0.4):
    # use the beta distribution to get a number n considering the strength:
    alpha = (1 - strength) * 5 + 1 
    beta = strength * 5 + 1
    # n between 1 and len(solution)-2
    max_n = len(solution) - 2
    n = int(1 + (max_n - 2) * random.betavariate(alpha, beta))
    indices = random.sample(range(1, len(solution)-3), n) #not the last one nor the first
    # shuffle the value of the indices:
    valori_da_mescolare = [solution[i] for i in indices]
    random.shuffle(valori_da_mescolare)
    # Riassegna i valori mescolati agli stessi indici in solution
    for i, indice in enumerate(indices):
        solution[indice] = valori_da_mescolare[i]
    return solution

In [None]:
def swap_mutation(solution):
    index = random.randint(1, len(solution)-3) #not the last one nor the first
    index2=index #should be higher
    while index2<=index:
        index2 = random.randint(1, len(solution)-2)
    selected_edge1 = solution[index]
    selected_edge2 = solution[index2]
    solution[index] = selected_edge2
    solution[index2] = selected_edge1
    return solution

In [None]:
def mutate_strength(individual, mutationRate=0.5):
    for swapped in range(1,len(individual)-1):
        if(random.random() < mutationRate):
            swapWith = int(random.random() * len(individual)-1)
            
            city1 = individual[swapped]
            city2 = individual[swapWith]
            
            individual[swapped] = city2
            individual[swapWith] = city1
    return individual

In [None]:
def inver_over_crossover(parent1, parent2):
    # Copia del primo genitore, su cui applicheremo le modifiche
    child = parent1[:]
    
    # Selezione di un punto di crossover casuale
    crossover_point = random.randint(1, len(parent1) - 2)
    
    # Segmento dal primo genitore (prima del punto di crossover)
    first_segment = parent1[:crossover_point]
    
    second_segment = []
    for city in parent2:
        if city not in first_segment:
            second_segment.append(city)
    
    # Combinazione dei segmenti per creare il figlio
    child = first_segment + second_segment
    
    return child

In [None]:
def crossover_inversion(seq1, seq2):
    dim = len(seq1) - 1  
    pos1, pos2 = sorted(random.sample(range(dim), 2))

    segmento_invertito = seq1[pos1:pos2 + 1][::-1]  
    nuovo_figlio = [None] * dim
    nuovo_figlio[pos1:pos2 + 1] = segmento_invertito

    indice_seq2 = 0
    for k in range(dim):
        if nuovo_figlio[k] is None:
            while seq2[indice_seq2] in nuovo_figlio:
                indice_seq2 += 1
            nuovo_figlio[k] = seq2[indice_seq2]

    nuovo_figlio.append(nuovo_figlio[0])
    return nuovo_figlio

In [None]:
def pmx_crossover(parent1, parent2):
    size = len(parent1)
    p1, p2 = [-1]*size, [-1]*size

    # Select crossover points
    cx_point1 = random.randint(0, size - 2)
    cx_point2 = random.randint(cx_point1 + 1, size - 1)

    # Copy the selected slice from first parent to child
    for i in range(cx_point1, cx_point2 + 1):
        p1[i] = parent1[i]
        p2[i] = parent2[i]

    # Map the values from parent1 to parent2 and vice versa
    for i in range(cx_point1, cx_point2 + 1):
        if parent2[i] not in p1:
            v = parent2[i]
            while p1[parent1.index(v)] != -1:
                v = parent2[parent1.index(v)]
            p1[parent1.index(v)] = parent2[i]

        if parent1[i] not in p2:
            v = parent1[i]
            while p2[parent2.index(v)] != -1:
                v = parent1[parent2.index(v)]
            p2[parent2.index(v)] = parent1[i]

    # Fill the remaining positions with corresponding parent genes
    for i in range(size):
        if p1[i] == -1:
            p1[i] = parent2[i]
        if p2[i] == -1:
            p2[i] = parent1[i]

    return p1, p2

In [None]:
def order_crossover(parent1, parent2):
    size = len(parent1)
    start, end = sorted([random.randint(0, size - 1) for _ in range(2)])

    # Create the child with -1 placeholders
    child = [-1] * size

    # Copy the crossover segment from parent1 to child
    child[start:end + 1] = parent1[start:end + 1]

    # Fill remaining positions with parent2 values in the order they appear
    p2_index = 0
    for i in range(size):
        if child[i] == -1:
            # Ensure that p2_index stays within bounds of parent2
            while p2_index < size and parent2[p2_index] in child:
                p2_index += 1
            # Now assign the value from parent2 if within bounds
            if p2_index < size:
                child[i] = parent2[p2_index]

    return child

### Helping function

In [12]:
def create_random_solution():
    solution =[i for i in range(len(CITIES))]
    np.random.shuffle(solution)
    solution.append(solution[0])
    return solution

Tournament selection

In [14]:
def parent_selection(population):
    candidates = sorted(np.random.choice(population, int(len(population)/8)), key=lambda e: e.fitness, reverse=True)
    return candidates[0]

### Simulated annealing 

In [15]:
def simulated_annealing(solution):
    temperatura_iniziale = 100
    tasso_riscaldamento = 1.02
    it=0

    #one out of five approach
    miglioramenti_recenti = deque(maxlen=5) 
    miglioramenti_richiesti = 1  

    #stopping criteria:
    miglioramenti_recenti_stop = deque(maxlen=1000)
    miglioramenti_recenti_stop.append(True)

    # Initial solution: greedy one!
    x_corrente = solution
    costo_corrente = fitness(x_corrente)
    best_cost = costo_corrente
    best_sol = x_corrente

    temperatura = temperatura_iniziale
    while it<1_000:
        it+=1
        # tweak the solution
        rn = random.random()
        first_time=True
        while rn > 0.8 or first_time:
            first_time=False
            x_nuovo = swap_mutation(x_corrente.copy())
            costo_nuovo = fitness(x_nuovo)
            rn = random.random()
        
        # variation of fitness by changing sign
        delta_costo = costo_nuovo*(-1) - costo_corrente*(-1) 
        #we are sure the solution after swap mutation is valid if the previous was it.
            
        if delta_costo < 0 or (random.random() < math.exp(-delta_costo / temperatura) and delta_costo!=0):
            x_corrente = x_nuovo
            costo_corrente = costo_nuovo
            miglioramenti_recenti.append(True)
            miglioramenti_recenti_stop.append(True)
            if costo_corrente*(-1) < best_cost*(-1):
                best_cost = costo_corrente
                best_sol = x_corrente
        else:
            miglioramenti_recenti.append(False)
            miglioramenti_recenti_stop.append(False)

        if miglioramenti_recenti.count(True) > miglioramenti_richiesti:
            temperatura *= tasso_riscaldamento  # more exploration
        if miglioramenti_recenti.count(True) < miglioramenti_richiesti:
            temperatura /= tasso_riscaldamento
        
        if miglioramenti_recenti_stop.count(True)==0: #stop condition
            break
    return best_sol

## EA final implementation

Since, sometimes it does not reach the best solution, the idea is to simplify the number of iteration of the code and execute it more time and choose the best result over all the executions.

In some dataset is better to continue running the instance for a longher time, in others, instead, is better to execute more than once different instances to reach to an optimum solution (it would be better to to it in parallele using thread).

To consider all these elements together, we execute the EA algorithm considering the length of the city space.

In [19]:
def greedy_2(startingcity: int):
    visited = np.full(len(CITIES), False)
    dist = DIST_MATRIX.copy()
    city = startingcity
    visited[city] = True
    tsp = list()
    tsp.append(int(city))
    while not np.all(visited):
        dist[:, city] = np.inf
        closest = np.argmin(dist[city])
        visited[closest] = True
        city = closest
        tsp.append(int(city))
    tsp.append(tsp[0])
    return tsp


In [None]:

def execute_EA(small_db = False, POPULATION_SIZE=100, OFFSPRING_SIZE=200, MAX_GENERATIONS=200):
    segments = [
        ({c1, c2}, float(DIST_MATRIX[c1, c2])) for c1, c2 in combinations(range(len(CITIES)), 2)
    ]
    population = [Individual(simulated_annealing(greedy_sol(random.randint(0, len(CITIES)-1), segments))) for _ in range(int(POPULATION_SIZE))]
    # while len(population)<POPULATION_SIZE:
    #     population.append(Individual(genome=simulated_annealing(create_random_solution())))
    for i in population:
        i.fitness = fitness(i.genome)
    population.sort(key=lambda i: i.fitness, reverse=True)


    for g in range(MAX_GENERATIONS):
        offspring = []
        for _ in range (OFFSPRING_SIZE):
            if np.random.random()<0.4:#mutation probability:
                p=parent_selection(population)
                if small_db:
                    o=swap_mutation(p.genome.copy())
                else:
                    o=mutate_strength(p.genome.copy())
            else:
                i1 = parent_selection(population)
                i2 = parent_selection(population)
                o = crossover_inversion(i1.genome.copy(), i2.genome.copy())
            offspring.append(Individual(genome=o, fitness =fitness(o)))
            if small_db:
                o2 = swap_mutation(o.copy())
            else:
                o2 = mutate_strength(o.copy())
            if np.random.random()<0.05:
                o3 = simulated_annealing(o2.copy())
                offspring.append(Individual(genome=o3, fitness =fitness(o3)))
            offspring.append(Individual(genome=o2, fitness =fitness(o2)))

        population.extend(offspring)
        population.sort(key=lambda i: i.fitness, reverse=True)
        population = population[:POPULATION_SIZE]
        if g%50==0:
            print("sol so far at gen: ", g, " is: ",fitness(population[0].genome)*(-1))

    population.sort(key=lambda i: i.fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    return population[0]

best_fitness = float('inf')*(-1)
best_sol = None

instances0 = [(100, 200, 20)]
instances1 = [(100, 305, 200), (100, 305, 200), (50, 80, 300)]
instances2 = [(100, 305, 1_500)]
instances3 = [(100, 305, 2_450)]

best_fitness = float('-inf')
best_sol = None

if len(CITIES)<100:
    instances=instances1
if len(CITIES)<30:
    instances=instances0
if len(CITIES)>100:
    instances=instances2
if len(CITIES)>200:
    instances=instances3


for current in instances:
    if len(CITIES)<50:
        valore = execute_EA(True,current[0], current[1], current[2]) 
    else:
        valore = execute_EA(False,current[0], current[1], current[2]) 
    if valore.fitness > best_fitness:
        best_fitness = valore.fitness
        best_sol = valore.genome

print("best fitness: ", best_fitness*(-1))
print(best_sol)

sol so far at gen:  0  is:  62079.912299169715
sol so far at gen:  50  is:  59888.218409692316
sol so far at gen:  100  is:  59725.56372576508
sol so far at gen:  150  is:  59367.7272611378
sol so far at gen:  200  is:  59291.68308071533
sol so far at gen:  250  is:  59167.385887602766
sol so far at gen:  300  is:  59043.324817839326
sol so far at gen:  350  is:  58875.701144279374
sol so far at gen:  400  is:  58799.8644122699
sol so far at gen:  450  is:  58683.25035753421
sol so far at gen:  500  is:  58585.00078205599
sol so far at gen:  550  is:  58585.00078205599
sol so far at gen:  600  is:  58521.288634827164
sol so far at gen:  650  is:  58480.521028284835
sol so far at gen:  700  is:  58249.8020392395
sol so far at gen:  750  is:  58038.746999668285
sol so far at gen:  800  is:  57978.92624041517
sol so far at gen:  850  is:  57978.92624041517
sol so far at gen:  900  is:  57859.71469047698
sol so far at gen:  950  is:  57853.10931181223
sol so far at gen:  1000  is:  57771.8

KeyboardInterrupt: 