In [1]:
!pip install python-Levenshtein

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import random
import Levenshtein
import matplotlib.pyplot as plt
import numpy as np

In [3]:
alphabet = [
    'a', 'b', 'c', 'd', 'e', 'f', 'g',
    'h', 'i', 'j', 'k', 'l', 'm', 'n',
    'o', 'p', 'q', 'r', 's', 't', 'u',
    'v', 'w', 'x', 'y', 'z', ' '
]

In [4]:
def random_string(l, Sigma):
    string = ""
    for i in range(l):
        value = random.choice(Sigma)
        string += value
    return string

In [5]:
def initialize_population(N, l, Sigma):
    return [random_string(l, Sigma) for x in range(0, N)]

In [6]:
def fraction_correct_characters(string, target):
    assert len(string) == len(target)
    
    correct = 0
    
    for i in range(0, len(string)):
        if string[i] == target[i]:
            correct += 1 
    
    return correct / len(target)

In [7]:
def levenshtein_distance(string, target):
    assert len(string) == len(target)
    
    return Levenshtein.distance(string, target)

In [8]:
def calculate_fitness(string, target, fit_fn):
    if fit_fn == 'frac':
        return fraction_correct_characters(string, target)
    elif fit_fn == 'lev':
        return levenshtein_distance(string, target)
    else:
        return fraction_correct_characters(string, target)

In [9]:
def select_parent(pop, pop_fit, K, fit_fn):
    sel_pool = random.sample(list(zip(pop_fit, range(0, len(pop_fit)))), K)
    
    ind = None
    if fit_fn == 'frac':
        _, ind = max(sel_pool)
    elif fit_fn == 'lev':
        _, ind = min(sel_pool)
    
    return pop[ind]

In [10]:
def cross_over(str1, str2):
    assert len(str1) == len(str2)
    
    ind_to_split = random.randint(0, len(str1) - 1)
    
    return str1[:ind_to_split] + str2[ind_to_split:], str2[:ind_to_split] + str1[ind_to_split:]

In [11]:
def mutate(string, mu, Sigma):
    x = ""
    for i in range(0, len(string)):
        random_number = random.random()
        if mu > random_number:
            x += random_string(1, Sigma)
        else:
            x += string[i]
    
    return x

In [12]:
def step(pop, target, K, mu, Sigma, fit_fn):
    next_pop = []
    best_fit = None
    
    # calculate fitness of individuals in current population
    pop_fit = [calculate_fitness(x, target, fit_fn) for x in pop]
    avg_fit = sum(pop_fit) / len(pop_fit)
    
    if fit_fn == 'frac':
        best_fit = max(pop_fit)
    elif fit_fn == 'lev':
        best_fit = min(pop_fit)
    
    # create next population
    while(len(next_pop) != len(pop)):
        parent1 = select_parent(pop, pop_fit, K, fit_fn)
        parent2 = select_parent(pop, pop_fit, K, fit_fn)
        
        child1, child2 = cross_over(parent1, parent2)
        next_pop.append(mutate(child1, mu, Sigma))
        next_pop.append(mutate(child2, mu, Sigma))
    
    return next_pop, avg_fit, best_fit

In [13]:
def GA(target, K, mu, Sigma, fit_fn = 'frac', max_iter=1000):
    l = len(target)
    p_c = 1.0
    N = 1000
    iteration = 0
    
    pop = initialize_population(N, l, Sigma)
    
    while(not target in pop and iteration < max_iter):
        pop, avg_fit, best_fit = step(pop, target, K, mu, Sigma, fit_fn)
        iteration += 1
        #print("Iter: {}, avg fit: {}, best fit: {}".format(iteration, avg_fit, best_fit))
        
    return iteration

In [14]:
def create_hist(target, K, mu, Sigma, n_runs=20):
    generations = []
    
    for _ in range(0,20):
        generations.append(GA(target, K, mu, Sigma))
    
    plt.boxplot(generations, meanline=True, vert=False, labels=[20])
    plt.xlabel('Number of generations (t_fin)')
    plt.ylabel('Number of runs')
    plt.title('Distribution of t_fin, K = {}, mu = {}'.format(K, mu))
    plt.show()

In [None]:
def tune_mu(target, K, Sigma, num_steps=100, n_runs=20):
    mus = np.arange(num_steps, dtype=float) / float(num_steps*10) 
    means = np.zeros(num_steps, dtype=float)
    stds = np.zeros(num_steps, dtype=float)
    for j, mu in enumerate(mus):
        gens = np.zeros(n_runs)
        for i in range(0, n_runs):
            gens[i] = GA(target, K, mu, Sigma)
        means[j] = np.mean(gens)
        stds[j] = np.std(gens)
        print("Simulation ({}/{}): [mu: {}, t_avg: {}, t_std: {}, n_runs: {}]".format(j+1, num_steps, mu, means[j], stds[j], n_runs))
    
    plt.errorbar(mus, means, stds, linestyle='None')
    plt.xlabel('Mutation rate (mu)')
    plt.ylabel('Number of generations (t_fin)')
    plt.show()

In [16]:
#create_hist("hello group one", 2, 0.01, alphabet)

In [17]:
#create_hist("hello group one", 2, 0.0, alphabet)

In [18]:
#create_hist("hello group one", 2, 0.1, alphabet)

In [19]:
tune_mu("hello group one", 2, alphabet)

Simulation (1/100): [mu: 0.0, t_avg: 18.0, t_std: 1.140175425099138]
Simulation (2/100): [mu: 0.001, t_avg: 17.9, t_std: 1.1789826122551597]
Simulation (3/100): [mu: 0.002, t_avg: 18.1, t_std: 1.2609520212918492]
Simulation (4/100): [mu: 0.003, t_avg: 17.9, t_std: 1.2206555615733703]
Simulation (5/100): [mu: 0.004, t_avg: 17.9, t_std: 1.1357816691600549]
Simulation (6/100): [mu: 0.005, t_avg: 18.1, t_std: 1.044030650891055]
Simulation (7/100): [mu: 0.006, t_avg: 18.25, t_std: 1.6695807857064]
Simulation (8/100): [mu: 0.007, t_avg: 18.05, t_std: 1.116915395184434]
Simulation (9/100): [mu: 0.008, t_avg: 18.05, t_std: 1.0234744745229363]
Simulation (10/100): [mu: 0.009, t_avg: 18.7, t_std: 1.3076696830622023]
Simulation (11/100): [mu: 0.01, t_avg: 18.45, t_std: 1.3219304066402287]
Simulation (12/100): [mu: 0.011, t_avg: 18.65, t_std: 1.4585952145814822]
Simulation (13/100): [mu: 0.012, t_avg: 18.5, t_std: 1.2041594578792296]
Simulation (14/100): [mu: 0.013, t_avg: 18.55, t_std: 1.16081867

KeyboardInterrupt: 