In [106]:
%load_ext autoreload
%autoreload 2
import random
import numpy as np
import matplotlib.pyplot as plt
from utils import plot_NQueens, plot_evolution

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Local Search: Genetic Algorithm

## 1.Genrating Phrases

In this section we consider a toy problem: generating a target phrases (e.g. "genetic algorithm") from an initial polupation of random strings. Assume the length of the target is known.

Problem Formulation: 

1. Direct Representation: just strings.
2. Exponential Search Space: $m^l$ where $m$ is the size of the alphabet (set of characters in our interest) and $l$ is the length of the string.

Now that we want to solve it with GA:
1. Chromesome: directly use the string itself.
2. Fitness: how match/close an individual is to the target.
3. Mating: combine two strings in someway to generate a new one.

The following cells will walk you through this problem to show the basic idea of GA

In [107]:
# setting up the problem
target = 'Genetic Algorithm'
u_case = [chr(x) for x in range(65, 91)]
l_case = [chr(x) for x in range(97, 123)]
gene_pool = u_case + l_case + [' ']  # all English chracters and white space


def init_population(pop_size, gene_pool, state_length):
    """
    Randomly initialize a population for genetic algorithm
        pop_size  :  Number of individuals in population
        gene_pool   :  List of possible values for individuals
        state_length:  The length of each individual
    """

    population = []
    for _ in range(pop_size):
        new_individual = "".join(random.choices(gene_pool, k=state_length))
        population.append(new_individual)

    return population


# test
init_pop = init_population(10, gene_pool, len(target))
init_pop

['olFDuUOoHLSPelYBD',
 'bHPtaPl DFo SJEqo',
 'SyvUWcvOQDKBIqBCb',
 'vfFpftKQJiby mRMF',
 'VSvIWjOeuUGzdHQeg',
 'yEVDt FsURrvcNxdV',
 'dkllHtiwKmazDyDgq',
 'OyPzDvWgwCUF EogZ',
 'EQafHlDaRDEHIayqT',
 'PcwKUrNgAmiokHBZV']

In [108]:
def fitness_fn(sample):
    # TODO: evaluate how close *sample* is to the target
    grades = 0
    for i in range(0, len(target)):
        if sample[i] == target[i]:
            grades += 1
    return grades


# test
fitness_fn(init_pop[0])

0

In [109]:
def select(r, population, fitness_fn):
    """
    TODO: select *r* samples from *population*
    the simplest choice is to sample from *population* with each individual weighted by its fitness
    """
    population.sort(key=lambda x: fitness_fn(x), reverse=True)
    return population[:r]+population[r-70:]


# test
parents = select(2, init_pop, fitness_fn)
parents

['PcwKUrNgAmiokHBZV',
 'bHPtaPl DFo SJEqo',
 'PcwKUrNgAmiokHBZV',
 'bHPtaPl DFo SJEqo',
 'yEVDt FsURrvcNxdV',
 'olFDuUOoHLSPelYBD',
 'SyvUWcvOQDKBIqBCb',
 'vfFpftKQJiby mRMF',
 'VSvIWjOeuUGzdHQeg',
 'dkllHtiwKmazDyDgq',
 'OyPzDvWgwCUF EogZ',
 'EQafHlDaRDEHIayqT']

In [110]:
def recombine(x, y):
    """
    TODO: combine two parents to produce an offspring
    """
    s = ''
    i = random.randint(0, len(x))
    i = random.randint(0, i)
    last_i = 0
    for j in range(i):
        if j // 2 == 0:
            s += x[last_i:i]
        else:
            s += y[last_i:i]
        last_i=i
    s += x[i:len(x)]
    for i in range(len(x)):
        ran = random.random()
        if ran > 0.97:
            s = s[:i]+str(random.randint(0,9))+s[i+1:]
    return s


def mutate(x, gene_pool, pmut):
    """
    apply mutation to *x* by randomly replacing one of its gene from *gene_pool*
    """
    ans = x
    if random.uniform(0, 1) >= pmut:
        return x
    l = [i for i in range(len(x))]
    for _ in range(1):
        i = l[random.randint(0, len(l) - 1)]
        l.remove(i)
        j = random.randrange(0, 10)
        ans = x[:i] + str(j) + x[i + 1:]
    for _ in range(1):
        n = len(x)
        g = len(gene_pool)
        c = random.randrange(0, n)
        r = random.randrange(0, g)
        new_gene = gene_pool[r]
        ans = x[:c] + new_gene + x[c + 1:]
    return ans



In [111]:
# now refactor things into a *Problem* abstraction
# you can directly reuse what you have implemented above
from abc import ABC, abstractmethod


class GAProblem(ABC):
    @abstractmethod
    def init_population(self, pop_size): pass

    @abstractmethod
    def fitness(self, sample): pass

    @abstractmethod
    def reproduce(self, population): pass

    @abstractmethod
    def replacement(self, old, new): pass


class PhraseGeneration(GAProblem):
    def __init__(self, target, alphabet):
        self.target = target
        self.alphabet = alphabet
        self.mutation_rate = mutation_rate

    def init_population(self, pop_size):
        # raise NotImplementedError()
        return init_population(pop_size, self.alphabet, len(self.target))

    def fitness(self, sample):
        # TODO: evaluate how close *sample* is to the target
        grades = 0
        for i in range(0, len(target)):
            if sample[i] == target[i]:
                if target[i].isdigit():
                    grades += 1
                else:
                    grades += 1
        return grades

    def reproduce(self, population, mutation_rate):
        """
        TODO: generate the next generation of population

        hint: make a new individual with

        mutate(recombine(*select(2, population, fitness_fn)), gene_pool, pmut)

        """
        ans = []
        size = len(population)
        s = set()
        best = max(population, key=problem.fitness)
        for _ in range(len(population)):
            i = random.randrange(0, size)
            while i in s:
                i = random.randrange(0, size)
            ans.append(mutate(recombine(population[i], best), gene_pool, mutation_rate))
            s.add(i)
        return ans

    def replacement(self, old, new):
        """
        You can use your own strategy, for example retain some solutions from the old population
        """
        l = old + new
        l.sort(key=lambda s: fitness_fn(s), reverse=True)
        return l[:len(old)]


def genetic_algorithm(
        problem: GAProblem,
        ngen, n_init_size, mutation_rate,
        log_intervel=100
):
    population = problem.init_population(n_init_size)
    best = max(population, key=problem.fitness)
    history = [(0, list(map(problem.fitness, population)))]

    for gen in range(ngen):
        next_gen = problem.reproduce(population, mutation_rate)
        population = problem.replacement(population, next_gen)

        if gen % log_intervel == 0:
            current_best = max(population, key=problem.fitness)
            if problem.fitness(current_best) > problem.fitness(best): best = current_best
            print(f"Generation: {gen}/{ngen},\tBest: {best},\tFitness={problem.fitness(best)}")
            history.append((gen, list(map(problem.fitness, population))))

    history.append((ngen - 1, list(map(problem.fitness, population))))
    return best, history

In [112]:
ngen = 1000
max_population = 300
mutation_rate = 0.1

sid = 12010903  # TODO:  replace this with your own sid
target = f"Genetic Algorithm by {sid}"
u_case = [chr(x) for x in range(65, 91)]
l_case = [chr(x) for x in range(97, 123)]
num_case = [str(i) for i in range(10)]
alphabet = u_case + l_case + num_case + [" "]  # TODO: fix this: what is the search space now?

problem = PhraseGeneration(target, alphabet)

# and run it
solution, history = genetic_algorithm(problem, ngen, max_population, mutation_rate)
solution

Generation: 0/1000,	Best: tcnGg83T2UPurhGhzSOdJk0FZdcwf,	Fitness=3
Generation: 100/1000,	Best: 43Genic9G6K3ritz6 48712010903,	Fitness=15
Generation: 200/1000,	Best: 095enic Al6oritzm 48312010903,	Fitness=20
Generation: 300/1000,	Best: G95etic Al4orithm 48312010903,	Fitness=23
Generation: 400/1000,	Best: G98etic Al6orithm by812010903,	Fitness=25
Generation: 500/1000,	Best: G94etic Algorithm by 12010903,	Fitness=27
Generation: 600/1000,	Best: Genetic Algorithm by812010903,	Fitness=28
Generation: 700/1000,	Best: Genetic Algorithm by812010903,	Fitness=28
Generation: 800/1000,	Best: Genetic Algorithm by 12010903,	Fitness=29
Generation: 900/1000,	Best: Genetic Algorithm by 12010903,	Fitness=29


'Genetic Algorithm by 12010903'

In [113]:
# visualize the evolution of the polulation
bins = np.linspace(0, problem.max_fitness, problem.max_fitness + 1)
plot_evolution(history, bins)
bins

AttributeError: 'PhraseGeneration' object has no attribute 'max_fitness'

## 2. N-Queens Problem

It is possible to solve the N-Queens problem with slight modifications.

For the problem:

1. Direct Representation: placement of queens.
2. Search Space: $n^n$, if we don't use any constraints.

To use GA:

Actually a new fitness function is the minimum modification you need for this problem.

Others are alomost the same as the previous problem.

In [None]:
class NQueensProblem(GAProblem):
    def __init__(self, n):
        self.n = n
        self.max_fitness = n * (n - 1) // 2  # max number if non-attacking pairs

    def init_population(self, pop_size):
        # TODO:alomost the same as the previous problem.
        raise NotImplementedError()

    def fitness(self, queens):
        """
        TODO

        hint: count the non-attacking pairs
        """
        raise NotImplementedError()

    def reproduce(self, population, mutation_rate):
        # TODO:alomost the same as the previous problem.
        raise NotImplementedError()

    def replacement(self, old, new):
        """
        You can use your own strategy, for example retain some solutions from the old population
        """
        return new

    def __repr__(self):
        return f"{self.n}-Queens Problem"

In [None]:
from utils import plot_NQueens

ngen = 1000
init_size = 120
mutation_rate = 0.08

n = 8
problem = NQueensProblem(n)
solution, history = genetic_algorithm(problem, ngen, init_size, mutation_rate)

In [None]:
# Example of how to use this function
# plot_NQueens([4, 2, 0, 6, 1, 7, 5, 3])
# replace the parameter with your own results
plot_NQueens(solution)

In [None]:
# Visualize the evolution of the polulation
bins = np.linspace(0, problem.max_fitness, problem.max_fitness)
plot_evolution(history, bins)