In [None]:
%config InlineBackend.figure_format = 'svg'
%matplotlib inline

import itertools
import multiprocessing
from multiprocessing.dummy import Pool as ThreadPool
from pathlib import Path
import random

import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
# Apparently, SNS stands for "Samuel Norman Seaborn", a fictional
# character from The West Wing
import seaborn as sns
import sympy

sns.set()
sympy.init_printing()
# Make the figures directory if it doesn't exist.
Path('figures/').mkdir(exist_ok=True)

In [None]:
def pmx(mom, dad):
    """Recombination using the Partially-Mapped Crossover algorithm.

    :param mom: The first parent.
    :type mom: list
    :param dad: The second parent.
    :type dad: list
    :returns: The recombined child.
    :rtype: list
    """
    # Pick a random chunk 1/3 the length of mom's genes to flat-out copy.
    l = len(mom) // 3
    i = np.random.randint(0, len(mom) - l)
    j = i + l
    child = [0] * len(mom)
    sublist1 = mom[i:j]
    sublist2 = dad[i:j]
    # Copy a chunk of mom's genes.
    child[i:j] = sublist1
    copied = set(sublist1)
    non_copied = (e for e in sublist2 if e not in sublist1)

    # Find a place for each element of dad's gene that won't clobber mom's.
    for elem in non_copied:
        # Get the index of the element in the dad array.
        idx = dad.index(elem)
        # Find a spot that won't be taken.
        while mom[idx] in copied:
            idx = dad.index(mom[idx])
        # Copy the element into the child.
        child[idx] = elem
        copied.add(elem)

    # Add the rest of dad's genes.
    for i in range(len(mom)):
        if dad[i] not in copied:
            child[i] = dad[i]
            copied.add(dad[i])
    return child

In [None]:
p1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
p2 = [9, 3, 7, 8, 2, 6, 5, 1, 4]

In [None]:
child = pmx(p1, p2)
child

In [None]:
def decode(genome):
    """Decode a given stack-encoded genome.

    :param genome: The genome to decode into a phenome.
    :returns: The indices for a valid tour path.
    """
    cities = list(range(len(genome)))
    phenome = []
    # Allele?
    for gene in genome:
        idx = gene % len(cities)
        phenome.append(cities[idx])
        cities.pop(idx)
    return phenome

In [None]:
decode([304, 128, 19, 44, 212])

In [None]:
def stackx(mom, dad):
    """Recombination with stack encoding."""
    return mom[:len(mom) // 2] + dad[len(dad) // 2:]

In [None]:
def recombine(mom, dad, encoding='path'):
    methods = {
#         'path': pmx,
        'stack': stackx,
    }
    return methods[encoding](mom, dad)

In [None]:
def generate_cities(n, scale=100):
    return np.random.rand(n, 2) * scale

def generate_population(sities, size):
    n = len(cities)
    population = [0] * size
    for i in range(size):
        individual = list(range(n))
        random.shuffle(individual)
        population[i] = individual
    return population

In [None]:
cities = generate_cities(50)
population = generate_population(cities, 1)
path = population[0]

plt.plot(cities[path][:, 0], cities[path][:, 1], 'r')
plt.plot(cities[:, 0], cities[:, 1], 'o')
plt.title('A random individual')
plt.axis('scaled')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.show()

In [None]:
def pairwise(iterable):
    """Iterate over the given iterable in pairs.

    pairwise([1, 2, 3, 4]) -> (1, 2), (2, 3), (3, 4)
    """
    a, b = itertools.tee(iterable)
    # Advance b one step
    next(b, None)
    return zip(a, b)

In [None]:
cities = generate_cities(10)
population = generate_population(cities, 20)
children = [recombine(mom, dad, 'stack') for mom, dad in pairwise(population)]
children = [decode(child) for child in children]
children

In [None]:
# children = [recombine(mom, dad, 'path') for mom, dad in pairwise(population)]
# children

In [None]:
def fitness(cities, path, encoding='path'):
    """Evaluate the fitness of the given path.

    Compute the Euclidean distance between every pair of cities in the path
    and add them together.

    :param cities: The array of cities through which to compute a path.
    :param path: The path through the given cities to compute the fitness for.
    :param encoding: One of 'path' or 'stack'.
    :returns: The fitness of the individual.
    """
    if encoding == 'path':
        individual = cities[path]
    elif encoding == 'stack':
        individual = cities[decode(path)]
    else:
        raise ValueError('invalid encoding')
    return 1 / sum(np.linalg.norm(c1 - c2) for c1, c2 in pairwise(individual))

In [None]:
cities = generate_cities(50)
population = generate_population(cities, 1)
path = population[0]

# Treat path like an ordered path.
print(fitness(cities, path, encoding='path'))
# Treat path like a stack-encoded intermediate genome.
print(fitness(cities, path, encoding='stack'))

In [None]:
def mutate(path):
    """Inverts a random subarray in the given path.

    Returns a new mutated copy of the given array.
    """
    i = np.random.randint(0, len(path) - 1)
    j = np.random.randint(i, len(path))
    x = path.copy()
    # Invert the subarray.
#     x[i:j] = x[i:j][::-1]
    x[i], x[j] = x[j], x[i]
#     x[i] = x[i] + 1

    return x

In [None]:
a = [1, 2, 3, 4, 5, 6, 7,8, 9]
mutate(a)

In [None]:
def deterministic_selection(population, size, func, cities, encoding='path'):
    """Deterministically select the most fit from the given population.

    Use the given fitness function to rank the population, then pick
    the next `size` of the population to move on. This assumes that,
    for a problem without recombination, the mutated individuals have
    been mixed in with the original population.

    :param population: The population to cull.
    :param size: The desired size of the population.
    :param func: The fitness function to rank the population by.
    :param cities: The array of city locations.
    :returns: The culled population, sorted upwards in increasing fitness.
    """
    population.sort(key=lambda p: func(cities, p, encoding))
    euthanize = len(population) - size
    return population[euthanize:]

In [None]:
def stochastic_selection(population, size, func, cities, encoding='path'):
    """Randomly select the most fit from the given population.
    
    Select without replacement an individual with probability
    proportional to its fitness.
    
    :param population: The population to cull.
    :param size: The desired size of the population.
    :param func: The fitness function to rank the population by.
    :param cities: The array of city locations.
    :returns: The culled population, unsorted.
    """
    fitnesses = np.array([func(cities, p, encoding) for p in population])
    probabilities = fitnesses / np.sum(fitnesses)
    survivors = np.random.choice(len(population), size, replace=False, p=probabilities)
    return [population[i] for i in survivors]

In [None]:
def select(population, size, func, cities, method='deterministic', encoding='path'):
    """Select the `size` most fit from the given population.

    :param population: The population to cull.
    :param size: The desired size of the population.
    :param func: The fitness function to rank the population by.
    :param cities: The array of city locations.
    :param method: One of 'deterministic' or 'stochastic'.
    :param encoding: One of 'path' or 'stack'
    :returns: The culled population, in arbitrary order.
    """
    methods = {
        'stochastic': stochastic_selection,
        'deterministic': deterministic_selection,
    }
    return methods[method](population, size, func, cities, encoding)

In [None]:
def ea(cities, size, func, iters, selection='deterministic', encoding='path'):
    """Run the evolutionary algorithm to solve the TSP.
    
    :param cities: The array of city locations.
    :param size: The population size to use.
    :param func: The fitness function to use.
    :param iters: The number of generations to run.
    :param selection: One of 'deterministic' or 'stochastic'.
    :param encoding: One of 'path' or 'stack'. Determines recombination method.
    """
    n = len(cities)
    population = generate_population(cities, size)
    best_fitnesses = np.zeros(iters)
    best_individuals = np.zeros((iters, n), dtype=int)
    for i in range(iters):
        # TODO: Sort, shuffle, or what?
        population.sort(key=lambda p: func(cities, p, encoding))
        population = population[n // 3:]
        random.shuffle(population)

        children = [recombine(mom, dad, encoding) for mom, dad in pairwise(population)]
        mutations = [mutate(c) for c in population]
        combined = population + children + mutations
        population = select(combined, size, func, cities, method=selection, encoding=encoding)

        fitnesses = np.array([func(cities, p, encoding) for p in population])
        best = fitnesses.argmax()
        best_fitnesses[i] = fitnesses[best]
        best_individuals[i] = decode(population[best]) if encoding == 'stack' else population[best]
    return best_fitnesses, best_individuals

In [None]:
def plot_summary(fitnesses, cities, paths, description=''):
    """Plot a summary of a given run of the simple_ea algorithm."""
    plt.plot(range(len(fitnesses)), fitnesses)
    plt.title('Population fitness over time')
    plt.xlabel('generation')
    plt.ylabel('fitness')
    plt.savefig(f'figures/prob3-fitness-{description}.pdf')
    plt.show()
    
    best = fitnesses.argmax()
    solution = cities[paths[best]]
    plt.plot(solution[:, 0], solution[:, 1], 'r')
    plt.plot(cities[:, 0], cities[:, 1], 'o')
    plt.title(f'The best {description} individual $f={fitnesses[best]:.5f}$')
    plt.axis('scaled')
    plt.xlabel('$x$')
    plt.ylabel('$y$')
    plt.savefig(f'figures/prob3-best-{description}.pdf')
    plt.show()

In [None]:
N = 40
pop_size = 50
generations = 800
cities = generate_cities(N)

plt.plot(cities[:, 0], cities[:, 1], 'o')
plt.title('The city locations')
plt.axis('scaled')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.savefig('figures/prob3-city-locations.pdf')
plt.show()

In [None]:
fitnesses, paths = ea(cities, pop_size, fitness, generations, selection='deterministic', encoding='stack')
plot_summary(fitnesses, cities, paths, description='deterministic-stack')

fitnesses, paths = ea(cities, pop_size, fitness, generations, selection='stochastic', encoding='stack')
plot_summary(fitnesses, cities, paths, description='stochastic-stack')