# Installing libraries
This part helps install libraries that are not available OOTB.

In [None]:
import sys
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install numpy


# Imports

Here we import all libraries we'll use and install `matplotlib` and `numby` modules needed for further work.

In [None]:
import random
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import csv
import math

# Loading data

In [None]:
X = []
Y = []
with open('data.csv', 'r') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for row in spamreader:
        X.append(int(row[0]))
        Y.append(int(row[1]))

In [None]:
plt.rcParams['figure.figsize'] = [20, 10]
plt.scatter(X,Y,marker="o")
plt.xlabel("Costume cost")
plt.ylabel("# of comic issues")
plt.show()

# Generating population
We're looking for optimal parameters _a_ and _b_ for the _f(x) = ax + b equation_ that is used for linear regression of the data.

Our individual will take a a binary form of which first half codes the _a_ parameter and second half codes the _b_ parameter.

Individual: \[binary_parameter_a | binary_parameter_b\], e.g. \[0 0 0 1 | 1 0 1 0\]

In [None]:
def generate_binary_phenotype(size: int):
    return [random.randrange(0, 2, 1) for i in range(size)]

def generate_population(population_size: int, phenotype_size: int, generator):
    return [generator(phenotype_size) for i in range(population_size)]

In [None]:
population = generate_population(100, 32, generate_binary_phenotype)

print(f"Population size: {len(population)}")
print(f"First individual: {str(population[0])}")

In [None]:
def decode_individual(individual):
    half = int(len(individual)/2)
    a = int("".join(str(x) for x in individual[0:half]), 2)
    b = int("".join(str(x) for x in individual[half:]), 2)
    return a, b

def fitness_function(individual):
    a, b = decode_individual(individual)
    fitness = 0
    for i in range(len(X)):
        fitness = fitness + abs(Y[i] - (a*X[i]+b))
    return individual, fitness

In [None]:
test_individual = population[0]
test_individual, fitness_value = fitness_function(test_individual)
test_a, test_b = decode_individual(test_individual)
print(f"Test individual: {str(test_individual)} codes {test_a}x + {test_b} function")
print(f"Test individual's fitness value: {fitness_value}")

In [None]:
individual_fitness_pairs = list(map(fitness_function, population))
best = min(individual_fitness_pairs, key=lambda item: item[1])
worst = max(individual_fitness_pairs, key=lambda item: item[1])


In [None]:
a_best, b_best = decode_individual(best[0])
a_worst, b_worst = decode_individual(worst[0])

plt.rcParams['figure.figsize'] = [20, 10]

figure, axis = plt.subplots(ncols=2)

x_best = np.arange(0.0, 2000.0, 1.0)
y_best = np.add(np.multiply(x_best,a_best), b_best)

x_worst = np.arange(0.0, 2000.0, 1.0)
y_worst = np.add(np.multiply(x_worst,a_worst), b_worst)

axis[0].scatter(X,Y,marker="o")
axis[1].scatter(X,Y,marker="o")
axis[0].plot(x_best, y_best)
axis[1].plot(x_worst, y_worst)

plt.show()

In [None]:
def mutation(individual, initial_mutation_rate):
    mutation_rate = 1/len(individual) if initial_mutation_rate < 0 else initial_mutation_rate
    new_phenotype = []
    for value in individual:
        new_phenotype.append( abs(value - 1) if random.random() < mutation_rate else value)
    return new_phenotype

print ("Original phenotype: " +  str(worst[0]))
mutant = mutation(worst[0], 0.5)
print ("Mutated phenotype : " +  str(mutant))

In [None]:
def recombination(individual_1, individual_2):
    crossover_point = random.randrange(1, len(individual_1), 1)
    child_1 = individual_1[:crossover_point] + individual_2[crossover_point:]
    child_2 = individual_2[:crossover_point] + individual_1[crossover_point:]
    return child_1, child_2

test_1 = [1, 1, 1, 1, 1, 1, 1, 1]
test_2 = [0, 0, 0, 0, 0, 0, 0, 0]
child_1, child_2 = recombination(test_1, test_2)
print(f"Children: {str(child_1)} & {str(child_2)}")

In [None]:
test = [('a', 10), ('b', 17), ('c', 11)]
def simple_selection(individual_fitness_pairs):
    first = individual_fitness_pairs[random.randrange(0, len(individual_fitness_pairs), 1)]
    individual_fitness_pairs.remove(first)
    second = individual_fitness_pairs[random.randrange(0, len(individual_fitness_pairs), 1)]
    individual_fitness_pairs.remove(second)
    return first[0], second[0], individual_fitness_pairs

f, s, ind = simple_selection(test)
print(f)
print(s)
print(str(ind))

In [None]:
test = [('a', 10), ('b', 17), ('c', 11)]
def monte_carlo_selection(individual_fitness_pairs):
    rulette_wheel = []
    temp = list(map(lambda x : (x[0], math.ceil(10000/x[1])), individual_fitness_pairs))
    for index in range(len(temp)):
        rulette_wheel = rulette_wheel + temp[index][1]*[index]
    rand = random.randrange(0, len(rulette_wheel), 1)
    first = individual_fitness_pairs[rulette_wheel[rand]]
    rulette_wheel = [index for index in rulette_wheel if index != rulette_wheel[rand]]
    rand = random.randrange(0, len(rulette_wheel), 1)
    second = individual_fitness_pairs[rulette_wheel[rand]]
    individual_fitness_pairs.remove(first)
    individual_fitness_pairs.remove(second)
    return first[0], second[0], individual_fitness_pairs
    
f, s, ind = monte_carlo_selection(test)
print(f)
print(s)
print(str(ind))

In [None]:
def breedResult(population, mutation_rate, elite_size, epochs, fitness_f, mutation_f, recombination_f, selection_f):
    pop_size = len(population)
    best_history = []
    current_population = population
    for epoch in range(epochs):
        new_population = []
        individual_fitness_pairs = list(map(fitness_f, current_population))
        individual_fitness_pairs.sort(key=lambda x : x[1])
        best_history.append(individual_fitness_pairs[0][0])
        new_population = new_population + [individual[0] for individual in individual_fitness_pairs][:elite_size]
        while len(new_population) < pop_size:
            first, second, individual_fitness_pairs = selection_f(individual_fitness_pairs)
            first, second = recombination_f(first, second)
            first = mutation_f(first, mutation_rate)
            second = mutation_f(second, mutation_rate)
            new_population.append(first)
            new_population.append(second)
        current_population = new_population
    return best_history

results = breedResult(population, -1, 10, 100, fitness_function, mutation, recombination, monte_carlo_selection)

In [None]:
a_best, b_best = decode_individual(results[-1])

plt.rcParams['figure.figsize'] = [20, 10]

figure, axis = plt.subplots(ncols=2)

x_best = np.arange(0.0, 2000.0, 1.0)
y_best = np.add(np.multiply(x_best,a_best), b_best)

x_worst = np.arange(0.0, 2000.0, 1.0)
for result in results:
    current = decode_individual(result)
    a_worst, b_worst = decode_individual(result)
    y_worst = np.add(np.multiply(x_worst,a_worst), b_worst)
    axis[1].plot(x_worst, y_worst)

axis[0].scatter(X,Y,marker="o")
axis[1].scatter(X,Y,marker="o")
axis[0].plot(x_best, y_best)

plt.show()
print(f"Best solution {a_best}x + {b_best}")

In [None]:
def decode_individual_sq(individual):
    third = int(len(individual)/3)
    a = int("".join(str(x) for x in individual[0:third]), 2)
    b = int("".join(str(x) for x in individual[third:(third + third)]), 2)
    c = int("".join(str(x) for x in individual[(third + third):]), 2)
    return a, b, c

def fitness_function_sq(individual):
    a, b, c = decode_individual_sq(individual)
    fitness = 0
    for i in range(len(X)):
        fitness = fitness + abs(Y[i] - (a*X[i]**2+b*X[i]+c))
    return individual, fitness

population = generate_population(200, 48, generate_binary_phenotype)

results = breedResult(population, -1, 10, 100, fitness_function_sq, mutation, recombination, monte_carlo_selection)

a_best, b_best, c_best = decode_individual_sq(results[-1])
a_first, b_first, c_first = decode_individual_sq(results[1])

plt.rcParams['figure.figsize'] = [20, 10]

x_best = np.arange(0.0, 2000.0, 1.0)
y_best = np.add(np.multiply(np.multiply(x_best,x_best),a_best), np.add(np.multiply(x_best,b_best), c_best))
y_first = np.add(np.multiply(np.multiply(x_best,x_best),a_first), np.add(np.multiply(x_best,b_first), c_first))

plt.scatter(X,Y,marker="o"),
plt.plot(x_best, y_best)
# plt.plot(x_best, y_first)
plt.xlabel("Costume cost")
plt.ylabel("# of comic issues")
plt.show()
print(f"Best solution {a_best}x^2 + {b_best}x + {c_best}")