In [2]:
import numpy as np
import matplotlib.pyplot as plt
import random
random.seed(0) 
%matplotlib inline

def load_data_from_file(fileName = "advertising.csv"):
  data = np.genfromtxt(fileName, dtype=None, delimiter=',', skip_header=1)
  features_X = data[:, :3]
  sales_Y = data[:, 3]
  intercept = np.ones((features_X.shape[0], 1))
  features_X = np.concatenate((intercept, features_X), axis=1)
  return features_X, sales_Y


In [5]:
features_X, _ = load_data_from_file()
print(features_X[:5,:])

[[  1.  230.1  37.8  69.2]
 [  1.   44.5  39.3  45.1]
 [  1.   17.2  45.9  69.3]
 [  1.  151.5  41.3  58.5]
 [  1.  180.8  10.8  58.4]]


In [6]:
_, sales_Y = load_data_from_file()
print(sales_Y.shape)

(200,)


In [7]:
def generate_random_value(bound = 10):
    return (random.random() - 0.5)*bound 

In [8]:
def create_individual(n=4, bound=10):
    individual = [generate_random_value() for _ in range(n)]
    return individual

In [9]:
invidual = create_individual()
print(invidual)

[3.444218515250481, 2.5795440294030247, -0.79428419169155, -2.4108324970703663]


In [10]:
def compute_loss(invidual):
    theta = np.array(individual)
    y_hat = features_X.dot(theta)
    loss = np.multiply((y_hat-sales_Y), (y_hat - sales_Y)).mean()
    return loss 

In [11]:
def compute_fitness(invidual):
    loss = compute_loss(invidual)
    fitness = 1 / (loss + 1)
    return fitness

In [13]:
features_X, sales_Y = load_data_from_file()
individual = [4.09, 4.82, 3.10, 4.02]
fitness_score = compute_fitness(individual)
print(fitness_score)

1.0185991537088997e-06


In [14]:
def crossover(individual1, individual2, crossovaer_rate=0.9):
    individual1_new = individual1.copy()
    individual2_new = individual2.copy()

    for i in range(len(individual1)):
        if random.random() < crossovaer_rate:
            individual1_new[i] = individual2[i]
            individual2_new[i] = individual1[i]
    return individual1_new, individual2_new

In [15]:
individual1 = [4.09, 4.82, 3.10, 4.02]
individual2 = [3.44, 2.57, -0.79, -2.41]

individual1, individual2 = crossover(individual1, individual2, 2.0)
print("individual2: ", individual1)
print("individual1: ", individual2)

individual2:  [3.44, 2.57, -0.79, -2.41]
individual1:  [4.09, 4.82, 3.1, 4.02]


In [16]:
def mutate(individual, mutation_rate = 0.05):
    individual_m = individual.copy()

    for i in range(len(individual)):
        if random.random() < mutation_rate:
            individual_m[i] = generate_random_value()
    return individual_m

In [18]:
before_individual = [4.09, 4.82, 3.10, 4.02]
after_individual = mutate(individual, mutation_rate=2.0)
print(before_individual == after_individual)

False


In [19]:
def initializePopulation(m):
    population = [create_individual() for _ in range(m)]
    return population 

In [20]:
population = initializePopulation(100)
print(len(population))

100


In [23]:
def selection(sorted_old_poplation, m):
    index1 = random.randint(0, m-1)
    while True:
        index2 = random.randint(0, m-1)
        if(index2 != index1):
            break 
    individual_s = sorted_old_poplation[index1]
    if index2 > index1:
        individual_s = sorted_old_poplation[index2]
    
    return individual_s


In [24]:
population = initializePopulation(m=100)
individual_s = selection(population, m=100)
print(individual_s)

[-1.586538409487509, -4.9068225958490155, 2.2162474304744997, 4.099060293040378]


In [25]:
def create_new_population(old_population, elitism=2, gen=1):
    m = len(old_population)
    sorted_population = sorted(old_population, key=compute_fitness)
    
    if gen%1 == 0:
        print("Best loss:", compute_loss(sorted_population[m-1]), "with chromsome: ", sorted_population[m-1])
    
    new_population = []
    while len(new_population) < m-elitism:
        # selection
        individual_s1 = selection(sorted_population, m)
        individual_s2 = selection(sorted_population, m)
        individual_t1, individual_t2 = crossover(individual_s1, individual_s2)
    
    # mutation
        individual_m1 = mutate(individual_t1)
        individual_m2 = mutate(individual_t2)
    
        new_population.append(individual_m1)
        new_population.append(individual_m2)

    for ind in sorted_population[m-elitism:]:
        new_population.append(ind.copy())

    return new_population, compute_loss(sorted_population[m-1])

In [26]:
individual1 = [4.09, 4.82, 3.10, 4.02]
individual2 = [3.44, 2.57, -0.79, -2.41]
old_population = [individual1, individual2]
new_population, _ = create_new_population(old_population, elitism=2, gen=1)

Best loss: 981739.45831358 with chromsome:  [3.44, 2.57, -0.79, -2.41]


In [27]:
def run_GA():
    n_generations = 100
    m = 600
    features_X, sales_Y = load_data_from_file()
    population = initializePopulation(m)
    losses_list = []
    for i in range(n_generations):
        population, losses = create_new_population(population, 2, i)
        losses_list.append(losses)
    return losses_list, population

In [28]:
losses_list, population = run_GA()

Best loss: 981739.45831358 with chromsome:  [-3.927592603464926, 4.418707909207546, 4.4647201585378244, 4.626663572736399]
Best loss: 981739.45831358 with chromsome:  [-3.927592603464926, 4.418707909207546, 4.4647201585378244, 4.626663572736399]
Best loss: 981739.45831358 with chromsome:  [-3.927592603464926, 4.418707909207546, 4.4647201585378244, 4.626663572736399]
Best loss: 981739.45831358 with chromsome:  [-3.927592603464926, 4.418707909207546, 4.4647201585378244, 4.626663572736399]
Best loss: 981739.45831358 with chromsome:  [-3.927592603464926, 4.418707909207546, 4.4647201585378244, 4.626663572736399]
Best loss: 981739.45831358 with chromsome:  [-3.927592603464926, 4.418707909207546, 4.4647201585378244, 4.626663572736399]
Best loss: 981739.45831358 with chromsome:  [-3.927592603464926, 4.418707909207546, 4.4647201585378244, 4.626663572736399]
Best loss: 981739.45831358 with chromsome:  [-3.927592603464926, 4.418707909207546, 4.4647201585378244, 4.626663572736399]
Best loss: 98173