# Local search

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls.

In [865]:
import random
import local_search_lib
from tqdm import tqdm

In [None]:
fitness = local_search_lib.make_problem(1)
for n in range(10):
    ind = random.choices([0, 1], k=50)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2f}")

print(fitness.calls)

# Genetic Algorithm

In [867]:
# GA parameters
LOCI = 1000
POPULATION_SIZE = 180
N_PARENTS = POPULATION_SIZE//3
MUTATION_RATE = 0.5
TOURNAMENT_SIZE = POPULATION_SIZE//3
K = 0.7
CONVERGENCE_GEN = 100
CONVERGENCE_THRESHOLD = 0.01

# Generate random population

In [868]:
def generate_random_individual():
    genome = random.choices([0, 1], k=LOCI)
    return {
        "genome": genome,
        "fitness": 0,
        "avg_distance": 0
    }

def generate_population(population_size=POPULATION_SIZE):
    population = [generate_random_individual() for _ in range(population_size)]
    return population

# Fitness evaluation

In [869]:
def evaluate_fitness(population, fitness):
    for individual in population:
        individual["fitness"] = fitness(individual["genome"])
    return population

# Mutation

In [870]:
def mutate(genome, mutation_rate):
    if random.random() < mutation_rate:
        i = random.randint(0, len(genome)-1)
        genome[i] = 1 - genome[i]
    return genome

def mutate_population(population, mutation_rate=MUTATION_RATE):
    for i in population:
        i["genome"] = mutate(i["genome"], mutation_rate)
    return population

# Crossover

In [871]:
def reproduce(parent1, parent2):
    new_genome=[]
    for p1, p2 in zip(parent1["genome"], parent2["genome"]):
        new_genome.append(random.choice((p1, p2)))
    return {
        "genome": new_genome,
        "fitness": 0,
        "avg_distance": 0
        }

def reproduce_population(parents, population_size=POPULATION_SIZE):
    population = []
    while len(population) + len(parents) < population_size:
        parent1, parent2 = random.sample(parents, 2)
        population.append(reproduce(parent1, parent2))
    return parents + population

# Diversity evaluation

In [872]:
def hamming_distance(genome1, genome2):
    return sum(al1 != al2 for al1, al2 in zip(genome1, genome2))/LOCI


def evaluate_diversity(population):
    for individual in population:
        individual["avg_distance"] = 0

    for i in range(len(population)):
        for j in range(i + 1, len(population)):
            dist = hamming_distance(population[i]["genome"], population[j]["genome"])
            population[i]["avg_distance"] += dist
            population[j]["avg_distance"] += dist
    
    for individual in population:
        individual["avg_distance"] /= (len(population)-1)

    return population

# Parent selection

In [873]:
def tournament_selection(population, tournament_size=TOURNAMENT_SIZE, k=K):
    individuals = random.sample(population, tournament_size)
    selected_i = max(individuals, key=lambda i: (k*i["fitness"] + (k-1)*(1-i["avg_distance"])))
    return selected_i


def parent_selection(population, n_parents=N_PARENTS, tournament_size=TOURNAMENT_SIZE, k=K):
    selected_parents = []
    for _ in range(n_parents):
        selected_parents.append(tournament_selection(population, tournament_size, k))
    return selected_parents

# Plot function

In [874]:
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython import display
import seaborn as sns

plt.ion()
plt.draw()
plt.pause(1)

def plot1(max_fitness, min_fitness, mean_fitness, diversity_hist):
    display.clear_output(wait=True)
    plt.clf()
    plt.title("Fitness")
    plt.xlabel("Generation")
    plt.ylabel("Score")
    plt.plot(max_fitness, label="Max scores")
    plt.plot(min_fitness, label="Min scores")
    plt.plot(mean_fitness, label="Mean scores")
    plt.plot(diversity_hist, label="Diversity")
    plt.ylim(ymin=0)
    plt.text(len(max_fitness)-1, max_fitness[-1], str(max_fitness[-1]))
    # plt.text(len(min_fitness)-1, min_fitness[-1], str(min_fitness[-1]))
    # plt.text(len(mean_fitness)-1, mean_fitness[-1], str(mean_fitness[-1]))
    plt.text(len(diversity_hist)-1, diversity_hist[-1], str(diversity_hist[-1]))
    plt.legend()
    plt.pause(0.25)

<Figure size 640x480 with 0 Axes>

# Run GA algorithm

In [875]:
def simulation(problem_instance=1, population_size=POPULATION_SIZE, n_parents=N_PARENTS, k=K, tournament_size=TOURNAMENT_SIZE, mutation_rate=MUTATION_RATE, plot=True):
    fitness = local_search_lib.make_problem(problem_instance)
    
    population = generate_population(population_size)
    population = mutate_population(population, mutation_rate)

    max_hist = []
    mean_hist = []
    min_hist = []
    fitness_hist = []
    fitness_calls_hist = []
    diversity_hist = []

    counter_convergence = 0
    last_best_fitness = None
    
    while True:
        population = evaluate_fitness(population, fitness)
        population = evaluate_diversity(population)

        # Plot
        fitness_scores = [i["fitness"] for i in population]
        fitness_hist.append(fitness_scores)
        max_hist.append(max(fitness_scores))
        mean_hist.append(sum(fitness_scores)/len(fitness_scores))
        min_hist.append(min(fitness_scores))
        fitness_calls_hist.append(fitness.calls)
        diversity_hist.append(sum(i["avg_distance"] for i in population)/len(population))
        
        if plot:
            plot1(max_hist, min_hist, mean_hist, diversity_hist)

        best_fitness = max(fitness_scores)
        if last_best_fitness and abs((best_fitness - last_best_fitness)) < CONVERGENCE_THRESHOLD:
            counter_convergence += 1
        else:
            counter_convergence = 0
            last_best_fitness = best_fitness

        if max(fitness_scores) == 1.0 or counter_convergence > CONVERGENCE_GEN:
            break

        parents = parent_selection(population, n_parents, tournament_size, k)
        population = reproduce_population(parents)
        population = mutate_population(population, mutation_rate)
        
        
    return population, fitness_hist, max_hist, mean_hist, min_hist, fitness_calls_hist, diversity_hist
    

# Simulation

In [None]:
population, fitness_hist, max_hist, mean_hist, min_hist, fitness_calls_hist, diversity_hist = simulation(problem_instance=10, population_size=500, k=0.7, plot=False)
plot1(max_hist, min_hist, mean_hist, diversity_hist)
max_fitness = max(fitness_hist, key=lambda fs:max(fs))
i = fitness_hist.index(max_fitness)
best_individual = max(population, key=lambda i: i["fitness"])

print(f"Number of generations: {len(fitness_calls_hist)}")

print(f"Number of fitness calls to find max fitness: {fitness_calls_hist[i]}")
print(f"Best individual: {''.join(str(g) for g in best_individual['genome'])}")
print(f"Fitness: {best_individual['fitness']}")


# Results
## Problem instance = 1
Population = 180

K = 0.7

Number of generations: 199

Number of fitness calls to find max fitness: 35820

Best individual: 1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111

Fitness: 1.0

## Problem instance = 2
Population = 300

K = 0.7

Number of generations: 418

Number of fitness calls to find max fitness: 68340

Best individual: 1111011111111111111011111101011111111011111111111111111111111111111111011111111111111011111111111101111111111111111111111111111110101101111111111111111111100111111111111111111111110111111111111111111111111110111111011111111011010111111111111111111111111101111111111111111111111111111111111111101111111111011011111111111110011111111111111111111111111111111111111111111111111111111111111011111111111111111111111111111111110111111111111011111111111111111111111111111111110111111011011011111101111111111111111110100011111111111111111001111111111111111111111110111111111111111111111101111111111111111111111111111111111111111111111011111111111111111111110111111111111111110111111111111111111111101111111011111111101011111111111111111111110111110111101111111111101101111111111011011111111111111111110111111111111110111111111101111111111111111111101111100111111111111111111111111111111111111111111111111111111111111111111111111111111110111111110111011111111011111111111111111111111111111111111111111111011111

Fitness: 0.934

## Problem instance = 5
Population = 500

K = 0.7

Number of generations: 107

Number of fitness calls to find max fitness: 19220

Best individual: 0101101011100011101011000110100111001010110111101000011110111000111001101010101000101011111000111101011011011000111000111101101011110001100001001010000000111111011001101110001100010111100010110101110110000010001101111111111111011101010010100100111010110000010001010110100001001001001011010101001110001001101010100110000000001110110111000011010010011001011000010110110110100011001010001111001000010111111110001111000110110101101100000011000010001110101000010100000000000101000000110011110011011011101001010010100100111000111111101110001110100000110000001111101010000101100001111001000111000001011110001001011100001011101111011100100101110001010010001011000110010111101001000011101000001010000001100100100101111110000110111101011011110111111011100101100101000011100111101110010100011101010011000001001000001100001101000111110001010011000010010001101000000000001110011010001001100011000000110101111101010000000010100101101111000100110001110011010001101011010110101100101010110000110100111010010001011011

Fitness: 0.4313

## Problem instance = 10
Population = 500

K = 0.7

Number of generations: 108

Number of fitness calls to find max fitness: 17600

Best individual: 1001100101100100111001000101010100011011001010001000111000010001101011100110110001011111000011001011000101101011010110111000110011110011011001000111000011110001101011111100101011100111001000100100101110011111000000010111101010010110110100001111001010000001000101111101111001100111110001101100110000101010111111110110011000101100011111100100101011010010100100100100010110001110010011101010100101101110101010110010011001111000101101000111100011001111011010111101100010111001111110111011011111001001000111111111101110011111010111010111100101010101100111110001001000111001010110010001001111011001011111010000011010011100110101100000101001100111100101101001100011110100100000011100011001000000110110011011010011100110000110000101111111100011110010100001010100011011100001110001100111110101111001000011111100101110000101101111100011011000001010110100011010000011110011100101110000101000101010110100110011000111000111110011001110001000011010001110111101110111001011111100000010000011101111111110010001110111

Fitness: 0.3367906