### Lab 2 - TSP problem

In [None]:
import pandas as pd
import random as random

In [None]:
cities = pd.read_csv("cities/italy.csv", header=None, names=["city", "x", "y"])

## Greedy algorithm
- **Distance Calculation**:
  - Define a `distance` function that computes the Euclidean distance between two cities using their coordinates.

- **Greedy TSP Algorithm**:
  - Create a function `tsp_greedy` that takes a list of cities as input and finds a solution to the TSP using a greedy approach.
  
- **Initialization**:
  - Initialize an empty dictionary (`dict_city_solution`) to store the best path and total distance for each starting city.

- **Iterate Over Starting Cities**:
  - For each city in the list, treat it as the starting point:
    - Keep track of the current city, a list of visited cities, and a list of remaining cities to visit.

- **Constructing the Path**:
  - While there are still cities to visit:
    - Remove the current city from the list of cities to visit.
    - Find the closest unvisited city by calculating distances.
    - Move to the closest city, add it to the path, and update the total distance.

- **Complete the Tour**:
  - After visiting all cities, calculate the distance from the last city back to the starting city and add it to the total distance.

- **Store the Solution**:
  - Store the resulting path and total distance in the dictionary for the current starting city.

- **Find the Best Solution**:
  - After iterating through all starting cities, determine the path with the minimum total distance from the dictionary.

- **Output**:
  - Return the best path found, the corresponding total distance, and a list of all paths evaluated during the process.


In [None]:
def distance(city1, city2):
    return ((city1.x - city2.x)**2 + (city1.y - city2.y)**2)**0.5

def tsp_greedy(cities):
    # for each city, run the greedy algorithm and keep track of the best solution in a dictionary with the city as key,
    # the path of the visited cities and the total distance as value
    dict_city_solution = {}

    for starting_city in cities.itertuples():
        current_city = starting_city

        # define a list of visited cities and add the starting city
        path = [starting_city.city]

        # define a list of cities to visit and remove the starting city
        cities_to_visit = cities[cities.city != starting_city.city]

        # initialize the total distance
        total_distance = 0

        # for each city to visit, find the closest city and add it to the path, 
        # then move to that city and remove it from the cities to visit
        while not cities_to_visit.empty:
            cities_to_visit = cities_to_visit[cities_to_visit.city != current_city.city]

            # find the closest city
            closest_city = None
            min_distance = float('inf')

            for city in cities_to_visit.itertuples():
                dist = distance(current_city, city)
                if dist < min_distance:
                    closest_city = city
                    min_distance = dist

            # check if a closest city was found
            if closest_city is not None:
                total_distance += min_distance
                current_city = closest_city
                path.append(current_city.city)

        # add the distance from the last city to the starting one
        total_distance += distance(current_city, starting_city)

        # save the city and the solution in the dictionary
        dict_city_solution[starting_city.city] = (path, total_distance)

    # find the best solution
    best_solution = None
    best_distance = float('inf')

    # for each city, check if the total distance is better than the best distance
    for city, (path, total_distance) in dict_city_solution.items():
        if total_distance < best_distance:
            best_distance = total_distance
            best_solution = path

    # return just the paths ranked by descending distance (used later for the genetic algorithm)
    path_distance = list(dict_city_solution.values())
    path_distance = [x[0] for x in path_distance]

    return best_solution, best_distance, path_distance
    
best_solution_greedy, best_distance_greedy, paths_greedy = tsp_greedy(cities)
print("Best solution: ", best_solution_greedy)
print(f"Best distance: {best_distance_greedy*100:.2f}")
print("Paths greedy: ", paths_greedy)

## Genetic algorithm
- **Initialization**:
  - Set parameters for population size, mutation rate, number of generations, number of parents, and elite size.
  - Initialize the population with some individuals generated from the previous greedy solution, and fill the remaining spots randomly while ensuring valid solutions.

- **Fitness Calculation**:
  - Define a `fitness` function that calculates the inverse of the total travel distance for a solution.
  - Define a `compute_fitness` function that computes the fitness value for each individual in a given population.

- **Mutation**:
  - Implement a `mutation` function that performs a **scramble mutation** within a randomly selected subset of an individual’s cities, based on a predefined mutation rate.

- **Crossover**:
  - Implement a `crossover` function that performs **partially mapped crossover** between two given parents

- **Parent Selection**:
  - For the `parent_selection` function implement **tournament selection** approach to select parents for crossover based on their fitness values.

- **Elitism**:
  - Implement an elitism strategy in each population evolution to ensure that valuable individuals discovered in previous generations are preserved.

- **Evolution of Population**:
  - Generate a new population by performing crossover and mutation, ensuring it maintains the best individuals from the previous generation.


In [None]:
BEST_SOLUTION_GREEDY = paths_greedy
LIST_CITY = cities['city'].tolist()
LIST_CITY_SIZE = len(LIST_CITY)
POPULATION_SIZE = int(LIST_CITY_SIZE * 2.5)
NUM_ELITE = int(POPULATION_SIZE * 0.1)
MUTATION_RATE = 0.2
NUM_GENERATIONS = 100   # change it manually according to the instance
NUM_PARENTS = int(POPULATION_SIZE * 0.3)

# compute the distance between two cities
def distance(city1, city2):
    distance = ((city1[0] - city2[0])**2 + (city1[1] - city2[1])**2)**0.5
    return distance

# compute fitness of an individual
def fitness(individual):
    total_distance = 0
    for i in range(len(individual)):
        city1 = cities[cities['city'] == individual[i]]
        city2 = cities[cities['city'] == individual[(i+1) % len(individual)]]   # circular list in order to go back to the starting city
        
        city1_x, city1_y = float(city1.iloc[0]['x']), float(city1.iloc[0]['y'])
        city2_x, city2_y = float(city2.iloc[0]['x']), float(city2.iloc[0]['y'])

        total_distance += distance((city1_x, city1_y), (city2_x, city2_y))

    return float(1 / total_distance)


# compute fitness of the population
def compute_fitness(population):
    fitness_values = []
    for individual in population:
        fitness_values.append(fitness(individual))
    return fitness_values

# check if the solution is valid
def valid_solution(individual):
    # check if all cities are explored and there are no duplicates
    if len(set(individual)) != len(individual):
        return False
    return True

# implement scramble mutation
def mutation(individual):
    r = random.random()
    if r < MUTATION_RATE:
        start, end = sorted(random.sample(range(len(individual)), 2))
        
        subset = individual[start:end + 1]
        random.shuffle(subset)
        
        individual[start:end + 1] = subset

    return individual

# generate the initial population: initialize the population with the individual obtained with the greedy algorithm,
# then generate the remaining individuals randomly
def generate_population():
    population = []
    for i in range(len(BEST_SOLUTION_GREEDY)):
        population.append(mutation(BEST_SOLUTION_GREEDY[i]))

    for _ in range(POPULATION_SIZE - len(BEST_SOLUTION_GREEDY)):
        individual = random.sample(LIST_CITY, len(LIST_CITY))
        while not valid_solution(individual):
            individual = random.sample(LIST_CITY, len(LIST_CITY))
        population.append(individual)
    return population

# implement the partially mapped crossover
def crossover(parent1, parent2):
    size = len(parent1)
    start, end = sorted(random.sample(range(size), 2))
    
    child = [None] * size
    
    for i in range(start, end + 1):
        child[i] = parent1[i]
    
    index = (end + 1) % size
    for gene in parent2:
        if gene not in child:
            while child[index] is not None:
                index = (index + 1) % size
            child[index] = gene
            index = (index + 1) % size

    return child

# implement the tournament selection
def parent_selection(population, fitness_values, tournament_size=10):
    selected_parents = []
    for _ in range(NUM_PARENTS):
        tournament = random.sample(range(len(population)), tournament_size)
        best = max(tournament, key=lambda idx: fitness_values[idx])
        selected_parents.append(population[best])
    
    return selected_parents

# create the next generation of the population
def evolve_population(population, fitness_values):
    sorted_population_with_fitness = sorted(zip(fitness_values, population), reverse=True)

    # keep the individuals sorted by fitness
    sorted_population = [individual for fitness, individual in sorted_population_with_fitness]

    new_population = [None] * NUM_ELITE

    # elitism but apply mutation to the elite
    for i in range(NUM_ELITE):
        new_population[i] = mutation(sorted_population[i])
    
    parents = parent_selection(population, fitness_values)
    
    while len(new_population) < POPULATION_SIZE:
        parent1, parent2 = random.sample(parents, 2)
        child = crossover(parent1, parent2)
        child = mutation(child)
        new_population.append(child)
    return new_population[:POPULATION_SIZE]

# implement the genetic algorithm
def genetic_algorithm():
    population = generate_population()
    best_solution = None
    best_distance = float('inf')

    for generation in range(NUM_GENERATIONS):
        print(f"Generation {generation + 1}/{NUM_GENERATIONS}...")

        fitness_values = compute_fitness(population)
        best_temp_index = fitness_values.index(max(fitness_values))
        best_temp_distance = 1 / fitness_values[best_temp_index]
        print(f"Best temp solution: {population[best_temp_index]}")
        print(f"Best temp distance: {best_temp_distance*100}")

        if best_temp_distance < best_distance:
            best_solution = population[best_temp_index]
            best_distance = best_temp_distance

        # evolve the population (not at the last generation)
        if generation != NUM_GENERATIONS - 1:
            population = evolve_population(population, fitness_values)

    return best_solution, best_distance

if __name__ == "__main__":
    best_solution, best_distance = genetic_algorithm()
    print("Best solution: ", best_solution)
    print(f"Best distance: {best_distance*100:.2f}")
