In [46]:
import numpy
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from tqdm import tqdm
import warnings
warnings.simplefilter("ignore", UserWarning)

In [47]:
best_outputs =[]
df = pd.read_csv('dataset/heart_v2.csv')
df.head()

Unnamed: 0,age,sex,BP,cholestrol,heart disease
0,70,1,130,322,1
1,67,0,115,564,0
2,57,1,124,261,1
3,64,1,128,263,0
4,74,0,120,269,0


In [48]:
X = df.drop('heart disease',axis=1)
y = df['heart disease']

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)
X_train.shape, X_test.shape

((189, 4), (81, 4))

# Genetic algorithm

In [50]:
def cal_pop_fitness(pop):
    # Calculating the fitness value of each solution in the current population.
    fitness = []
    for i in range(len(pop)):
        if pop[i][1] >= 1:
            classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth = pop[i][0], min_samples_leaf=pop[i][1], n_estimators=pop[i][2], oob_score=True)
            classifier_rf.fit(X_train, y_train)
            fitness.append(classifier_rf.oob_score_)
            # print(fitness)
        else:
            fitness.append(0)
    best_outputs.append(numpy.max(fitness))
    return fitness

In [51]:
def select_mating_pool(pop, fitness, num_parents):
    # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
    parents = numpy.empty((num_parents, pop.shape[1]))
    for parent_num in range(num_parents):
        max_fitness_idx = numpy.where(fitness == numpy.max(fitness))
        max_fitness_idx = max_fitness_idx[0][0]
        parents[parent_num, :] = pop[max_fitness_idx, :]
        fitness[max_fitness_idx] = -99999
    return parents

In [52]:
def crossover(parents, offspring_size):
    offspring = numpy.empty(offspring_size)
    # The point at which crossover takes place between two parents. Usually, it is at the center.
    crossover_point = numpy.uint8(offspring_size[1]/2)

    for k in range(offspring_size[0]):
        # Index of the first parent to mate.
        parent1_idx = k % parents.shape[0]
        # Index of the second parent to mate.
        parent2_idx = (k+1) % parents.shape[0]
        # The new offspring will have its first half of its genes taken from the first parent.
        offspring[k, 0:crossover_point] = parents[parent1_idx, 0:crossover_point]
        # The new offspring will have its second half of its genes taken from the second parent.
        offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]
    return offspring

In [53]:
def mutation(offspring_crossover, num_mutations=1):
    mutations_counter = numpy.uint8(offspring_crossover.shape[1] / num_mutations)
    # Mutation changes a number of genes as defined by the num_mutations argument. The changes are random.
    for idx in range(offspring_crossover.shape[0]):
        gene_idx = mutations_counter - 1
        for mutation_num in range(num_mutations):
            # The random value to be added to the gene.
            random_value = numpy.random.randint(1.0, 6.0, 1)
            offspring_crossover[idx, gene_idx] = offspring_crossover[idx, gene_idx] + random_value
            gene_idx = gene_idx + mutations_counter
    return offspring_crossover

# Main part

In [61]:
num_weights = 3 # len(equation_inputs)

sol_per_pop = 5
num_parents_mating = 3

pop_size = (sol_per_pop, num_weights)

new_population = numpy.random.randint(low=1, high=(20-4), size=pop_size)
print(new_population)

num_generations = 1000

[[ 6 11 10]
 [ 5  3  9]
 [15  4  1]
 [12  3 14]
 [ 1  2  6]]


In [62]:
for generation in tqdm(range(num_generations)):

    #print("\nGeneration : ", generation)
    fitness = cal_pop_fitness(new_population)
    # print('Fitness: ')
    # print(fitness)

    parents = select_mating_pool(new_population, fitness,  num_parents_mating)
    # print("Parents: ")
    # print(parents)

    offspring_crossover = crossover(parents, offspring_size=(pop_size[0]-parents.shape[0], num_weights))
    # print("Crossover")
    # print(offspring_crossover)

    offspring_mutation = mutation(offspring_crossover, num_mutations=2)
    # print("Mutation")
    # print(offspring_mutation)

    new_population[0:parents.shape[0], :] = parents
    new_population[parents.shape[0]:, :] = offspring_mutation

fitness = cal_pop_fitness(new_population)
# print(fitness)

best_match_idx = numpy.where(fitness == numpy.max(fitness))

print("Best solution : \t\t", new_population[best_match_idx, :][0][0])
solCombo_ = new_population[best_match_idx, :][0][0]
print("Best solution fitness : \t", fitness[0])

100%|██████████| 1000/1000 [03:45<00:00,  4.43it/s]


Best solution : 		 [19 14  9]
Best solution fitness : 	 0.7037037037037037


In [63]:
# print(solCombo_)
classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=solCombo_[0], min_samples_leaf=solCombo_[1], n_estimators=solCombo_[2], oob_score=True)
classifier_rf.fit(X_train, y_train)

In [64]:
print("oob score :\t\t", classifier_rf.oob_score_) # checking the oob score
print("classifier score :\t", classifier_rf.score(X_test, y_test)) # checking the model score

oob score :		 0.7037037037037037
classifier score :	 0.6172839506172839


# Creating Results dataset

In [65]:
from csv import writer

def append_list_as_row(file_name, list_of_elem):
    # Open file in append mode
    with open(file_name, 'a+', newline='') as write_obj:
        # Create a writer object from csv module
        csv_writer = writer(write_obj)
        # Add contents of list as last row in the csv file
        csv_writer.writerow(list_of_elem)

In [66]:
row_contents = [solCombo_[0],solCombo_[1],solCombo_[2],classifier_rf.oob_score_,classifier_rf.score(X_test, y_test)]
# Append a list as new line to an old csv file
append_list_as_row('Report/report2_2022-12-21.csv', row_contents)

In [67]:
report = pd.read_csv('Report/report2_2022-12-21.csv')
report

Unnamed: 0,max_depth,min_samples_leaf,n_estimators,oob_score,classifier_score
0,16,14,9,0.703704,0.617284
1,19,22,17,0.703704,0.641975
2,14,13,8,0.687831,0.654321
3,7,19,14,0.714286,0.641975
4,7,10,13,0.671958,0.641975
5,2,18,12,0.698413,0.641975
6,7,19,11,0.698413,0.654321
7,17,19,14,0.714286,0.641975
8,15,22,17,0.703704,0.641975
9,37,19,16,0.698413,0.641975
