In [1]:
!gdown '1JRN1IcKBL-Q_j0JTFgi8Arn9bmJJ8aEJ' -O '/content/dataset/'
!gdown '1r2Sb4ogBjRR1Wu28gy9X2D4WmOdFoICT' -O '/content/dataset/'

Downloading...
From: https://drive.google.com/uc?id=1JRN1IcKBL-Q_j0JTFgi8Arn9bmJJ8aEJ
To: /content/dataset/zipcodes.csv
100% 590/590 [00:00<00:00, 1.93MB/s]
Downloading...
From: https://drive.google.com/uc?id=1r2Sb4ogBjRR1Wu28gy9X2D4WmOdFoICT
To: /content/dataset/advertising.csv
100% 4.06k/4.06k [00:00<00:00, 9.49MB/s]


In [74]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
random.seed(0)
%matplotlib inline

def load_data_from_file(path = '/content/dataset/advertising.csv'):
  data = np.genfromtxt(path, delimiter=',', dtype=None, skip_header=1)
  feature_X = data[:,:-1]
  sale_Y = data[:,-1]
  feature_X = np.c_[np.ones(len(feature_X)), feature_X]

  return feature_X, sale_Y

In [75]:
advertising = pd.read_csv('/content/dataset/advertising.csv')
advertising.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [4]:
advertising.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [76]:
feature_X, sale_Y = load_data_from_file()
print(feature_X[:5,:])

[[  1.  230.1  37.8  69.2]
 [  1.   44.5  39.3  45.1]
 [  1.   17.2  45.9  69.3]
 [  1.  151.5  41.3  58.5]
 [  1.  180.8  10.8  58.4]]


In [6]:
series_1 = np.ones(np.shape(feature_X)[0])
new = np.c_[feature_X, series_1]
print(new)
print(series_1)

[[  1.  230.1  37.8  69.2   1. ]
 [  1.   44.5  39.3  45.1   1. ]
 [  1.   17.2  45.9  69.3   1. ]
 [  1.  151.5  41.3  58.5   1. ]
 [  1.  180.8  10.8  58.4   1. ]
 [  1.    8.7  48.9  75.    1. ]
 [  1.   57.5  32.8  23.5   1. ]
 [  1.  120.2  19.6  11.6   1. ]
 [  1.    8.6   2.1   1.    1. ]
 [  1.  199.8   2.6  21.2   1. ]
 [  1.   66.1   5.8  24.2   1. ]
 [  1.  214.7  24.    4.    1. ]
 [  1.   23.8  35.1  65.9   1. ]
 [  1.   97.5   7.6   7.2   1. ]
 [  1.  204.1  32.9  46.    1. ]
 [  1.  195.4  47.7  52.9   1. ]
 [  1.   67.8  36.6 114.    1. ]
 [  1.  281.4  39.6  55.8   1. ]
 [  1.   69.2  20.5  18.3   1. ]
 [  1.  147.3  23.9  19.1   1. ]
 [  1.  218.4  27.7  53.4   1. ]
 [  1.  237.4   5.1  23.5   1. ]
 [  1.   13.2  15.9  49.6   1. ]
 [  1.  228.3  16.9  26.2   1. ]
 [  1.   62.3  12.6  18.3   1. ]
 [  1.  262.9   3.5  19.5   1. ]
 [  1.  142.9  29.3  12.6   1. ]
 [  1.  240.1  16.7  22.9   1. ]
 [  1.  248.8  27.1  22.9   1. ]
 [  1.   70.6  16.   40.8   1. ]
 [  1.  29

In [77]:
def generate_random_value(bonud=10):
  return (random.random() - 0.5) * bonud

In [78]:
def create_individuals(n=4, bound=10):
  return [generate_random_value()for _ in range(n)]

In [81]:
individual = create_individuals()
print(individual)

[-0.23403045847644188, 0.833820394550312, 4.081128851953352, 0.046868558173902564]


In [82]:
feature_X, sale_Y = load_data_from_file()


def compute_loss(individual):
  theta = np.array(individual)
  y_hat = feature_X.dot(theta)
  loss = np.multiply((y_hat - sale_Y), (y_hat - sale_Y)).mean()
  return loss

def compute_fitness(individual):
  loss = compute_loss(individual)
  fitness_value = 1 / (1 + loss)
  return fitness_value

In [83]:
individual = [4.09 , 4.82 , 3.10 , 4.02]
fitness_score = compute_fitness(individual)
print(fitness_score)

1.0185991537088997e-06


In [84]:
def cross_over(individual1, individual2, crossover_rate = 0.9):
  individual1_new = individual1.copy()
  individual2_new = individual2.copy()

  for i in range(len(individual1)):
    if random.random() < crossover_rate:
      individual1_new[i] = individual2[i]
      individual2_new[i] = individual1[i]

  return individual1_new, individual2_new

individual1 = [4.09 , 4.82 , 3.10 , 4.02]
individual2 = [3.44 , 2.57 , -0.79 , -2.41]
individual1 , individual2 = cross_over(individual1, individual2, 2.0)
print("individual1:", individual1)
print("individual2:", individual2)

individual1: [3.44, 2.57, -0.79, -2.41]
individual2: [4.09, 4.82, 3.1, 4.02]


In [85]:
def mutate(individual, mutation_rate = 0.1):
  individual_new = individual.copy()
  for i in range(len(individual)):
    if random.random() < mutation_rate:
      individual_new[i] = create_individuals()
  return individual_new

In [87]:
before_individual =[4.09, 4.82, 3.10, 4.02]
after_individual = mutate(individual, mutation_rate = 2.0)
print(before_individual == after_individual)

False


In [86]:
def create_population(m):
  return [create_individuals() for i in range(m)]

In [88]:
def selection(sorted_old_population, m):
  index1 = random.randint(0, m-1)
  while True:
    index2 = random.randint(0, m-1)
    if index1 != index2:
      break
  individual_s = sorted_old_population[index1]
  if index2 > index1:
    individual_s = sorted_old_population[index2]
  return individual_s

In [89]:
def create_new_population(old_population, elitism=2, gen=1):
    m = len(old_population)

    # Sort population by fitness, assuming higher fitness is better
    sorted_population = sorted(old_population, key=compute_fitness)

    # Print the best loss every generation
    if gen % 1 == 0:
        print("Best loss:", compute_loss(sorted_population[-1]), "with chromosome", sorted_population[-1])

    new_population = []

    # Generate new individuals while maintaining elitism
    while len(new_population) < m - elitism:
        # Select two parents
        individual1 = selection(sorted_population, m)
        individual2 = selection(sorted_population, m)

        # Apply crossover to generate offspring
        offspring1, offspring2 = cross_over(individual1, individual2)

        # Apply mutation
        mutation_individual1 = mutate(offspring1)
        mutation_individual2 = mutate(offspring2)

        # Add mutated offspring to the new population
        new_population.append(mutation_individual1)
        new_population.append(mutation_individual2)

    # Add the top 'elitism' individuals directly to the new population (elitism)
    for elite_individual in sorted_population[m-elitism:]:
        new_population.append(elite_individual.copy())

    # Return the new population and the best loss
    return new_population, compute_loss(sorted_population[-1])


In [90]:
individual1 = [4.09 , 4.82 , 3.10 , 4.02]
individual2 = [3.44 , 2.57 , -0.79 , -2.41]
old_population = [ individual1 , individual2 ]
new_population , _ = create_new_population ( old_population , elitism =2 , gen =1)

Best loss: 123415.051528805 with chromosome [3.44, 2.57, -0.79, -2.41]


In [91]:
def run_GA () :
  n_generations = 100
  m = 600
  features_X, sales_Y = load_data_from_file()
  population = create_population(m)
  losses_list = []
  for i in range ( n_generations ):
    population, losses = create_new_population(population, elitism=2, gen=i)
    losses_list.append(losses)
  return population, losses_list

In [92]:
population, losses_list = run_GA()

Best loss: 499.42870543645824 with chromosome [3.331600837968306, -0.09720038244186435, 1.449875562942834, -0.2732124726990648]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (4,) + inhomogeneous part.

In [39]:
import matplotlib.pyplot as plt

plt.plot(losses_list)
plt.xlabel('Generation')
plt.ylabel('Loss')
plt.title('Loss vs Generation')
plt.show()

NameError: name 'losses_list' is not defined