In [60]:
import numpy
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier


In [61]:
df = pd.read_csv('../../dataset/heart_v2.csv')
df.head()

Unnamed: 0,age,sex,BP,cholestrol,heart disease
0,70,1,130,322,1
1,67,0,115,564,0
2,57,1,124,261,1
3,64,1,128,263,0
4,74,0,120,269,0


In [62]:
X = df.drop('heart disease',axis=1)
y = df['heart disease']

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)
X_train.shape, X_test.shape

((189, 4), (81, 4))

# Genetic algorithm

In [64]:
def cal_pop_fitness(pop):
    # Calculating the fitness value of each solution in the current population.
    fitness = []
    for i in range(len(pop)):
        if pop[i][1] >= 1:
            classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth = pop[i][0], min_samples_leaf=pop[i][1], n_estimators=pop[i][2], oob_score=True)
            classifier_rf.fit(X_train, y_train)
            fitness.append(classifier_rf.oob_score_)
            print(fitness)
        else:
            fitness.append(0)
    return fitness

In [65]:
def select_mating_pool(pop, fitness, num_parents):
    # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
    parents = numpy.empty((num_parents, pop.shape[1]))
    for parent_num in range(num_parents):
        max_fitness_idx = numpy.where(fitness == numpy.max(fitness))
        max_fitness_idx = max_fitness_idx[0][0]
        parents[parent_num, :] = pop[max_fitness_idx, :]
        fitness[max_fitness_idx] = -99999
    return parents

In [66]:
def crossover(parents, offspring_size):
    offspring = numpy.empty(offspring_size)
    # The point at which crossover takes place between two parents. Usually, it is at the center.
    crossover_point = numpy.uint8(offspring_size[1]/2)

    for k in range(offspring_size[0]):
        # Index of the first parent to mate.
        parent1_idx = k % parents.shape[0]
        # Index of the second parent to mate.
        parent2_idx = (k+1) % parents.shape[0]
        # The new offspring will have its first half of its genes taken from the first parent.
        offspring[k, 0:crossover_point] = parents[parent1_idx, 0:crossover_point]
        # The new offspring will have its second half of its genes taken from the second parent.
        offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]
    return offspring

In [67]:
def mutation(offspring_crossover, num_mutations=1):
    mutations_counter = numpy.uint8(
        offspring_crossover.shape[1] / num_mutations)
    # Mutation changes a number of genes as defined by the num_mutations argument. The changes are random.
    for idx in range(offspring_crossover.shape[0]):
        gene_idx = mutations_counter - 1
        for mutation_num in range(num_mutations):
            # The random value to be added to the gene.
            random_value = numpy.random.randint(1.0, 10.0, 1)
            offspring_crossover[idx, gene_idx] = offspring_crossover[idx,
                                                                     gene_idx] + random_value
            gene_idx = gene_idx + mutations_counter
    return offspring_crossover

# main part

In [72]:
num_weights = 3 # len(equation_inputs)


sol_per_pop = 3
num_parents_mating = 2

pop_size = (sol_per_pop, num_weights)

new_population = numpy.random.randint(low=1, high=20, size=pop_size)
print(new_population)


num_generations = 1000

[[ 3 11 17]
 [17 11 12]
 [15 15 11]]


In [73]:
for generation in range(num_generations):

    print("\nGeneration : ", generation)
    fitness = cal_pop_fitness(new_population)
    print('Fitness: ')
    print(fitness)

    parents = select_mating_pool(new_population, fitness,  num_parents_mating)
    print("Parents: ")
    print(parents)

    offspring_crossover = crossover(parents, offspring_size=(pop_size[0]-parents.shape[0], num_weights))
    print("Crossover")
    print(offspring_crossover)

    offspring_mutation = mutation(offspring_crossover, num_mutations=2)
    print("Mutation")
    print(offspring_mutation)

    new_population[0:parents.shape[0], :] = parents
    new_population[parents.shape[0]:, :] = offspring_mutation

fitness = cal_pop_fitness(new_population)
# print(fitness)

best_match_idx = numpy.where(fitness == numpy.max(fitness))

print("Best solution : ", new_population[best_match_idx, :][0][0])
solCombo_ = new_population[best_match_idx, :][0][0]
# print("Best solution fitness : ", fitness[best_match_idx])


Generation :  0
[0.6507936507936508]
[0.6507936507936508, 0.6402116402116402]
[0.6507936507936508, 0.6402116402116402, 0.6984126984126984]
Fitness: 
[0.6507936507936508, 0.6402116402116402, 0.6984126984126984]
Parents: 
[[15. 15. 11.]
 [ 3. 11. 17.]]
Crossover
[[15. 11. 17.]]
Mutation
[[23. 17. 17.]]

Generation :  1
[0.6984126984126984]
[0.6984126984126984, 0.6507936507936508]
[0.6984126984126984, 0.6507936507936508, 0.6772486772486772]
Fitness: 
[0.6984126984126984, 0.6507936507936508, 0.6772486772486772]
Parents: 
[[15. 15. 11.]
 [23. 17. 17.]]
Crossover
[[15. 17. 17.]]
Mutation
[[23. 21. 17.]]

Generation :  2
[0.6984126984126984]
[0.6984126984126984, 0.6772486772486772]
[0.6984126984126984, 0.6772486772486772, 0.6984126984126984]
Fitness: 
[0.6984126984126984, 0.6772486772486772, 0.6984126984126984]
Parents: 
[[15. 15. 11.]
 [23. 21. 17.]]
Crossover
[[15. 21. 17.]]
Mutation
[[16. 27. 17.]]

Generation :  3
[0.6984126984126984]
[0.6984126984126984, 0.6984126984126984]
[0.698412698

In [74]:
# print(solCombo_)
classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=solCombo_[0], min_samples_leaf=solCombo_[1], n_estimators=solCombo_[2], oob_score=True)
classifier_rf.fit(X_train, y_train)

In [75]:
print("oob score:\t\t", classifier_rf.oob_score_) # checking the oob score
print("classifier score:\t", classifier_rf.score(X_test, y_test)) # checking the model score

oob score:		 0.7037037037037037
classifier score:	 0.6419753086419753
