In [1]:
import numpy
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from tqdm import tqdm
import time
import random
import warnings
warnings.simplefilter("ignore", UserWarning)

In [2]:
best_outputs =[]
df = pd.read_csv('dataset/heart_v2.csv')
df.head()

Unnamed: 0,age,sex,BP,cholestrol,heart disease
0,70,1,130,322,1
1,67,0,115,564,0
2,57,1,124,261,1
3,64,1,128,263,0
4,74,0,120,269,0


In [3]:
X = df.drop('heart disease',axis=1)
y = df['heart disease']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)

# Genetic algorithm

In [4]:
def cal_pop_fitness(pop):
    # Calculating the fitness value of each solution in the current population.
    fitness = []
    for i in range(len(pop)):
        if pop[i][1] >= 1:
            classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth = pop[i][0], min_samples_leaf=pop[i][1], n_estimators=pop[i][2], oob_score=True)
            classifier_rf.fit(X_train, y_train)
            fitness.append(classifier_rf.oob_score_)
            # print(fitness)
        else:
            fitness.append(0)
    best_outputs.append(numpy.max(fitness))
    return fitness

In [5]:
def select_mating_pool(pop, fitness, num_parents):
    # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
    parents = numpy.empty((num_parents, pop.shape[1]))
    for parent_num in range(num_parents):
        max_fitness_idx = numpy.where(fitness == numpy.max(fitness))
        max_fitness_idx = max_fitness_idx[0][0]
        parents[parent_num, :] = pop[max_fitness_idx, :]
        fitness[max_fitness_idx] = -99999
    return parents

In [6]:
def crossover(parents, offspring_size):
    offspring = numpy.empty(offspring_size)
    # The point at which crossover takes place between two parents. Usually, it is at the center.
    crossover_point = numpy.uint8(offspring_size[1]/2)

    for k in range(offspring_size[0]):
        # Index of the first parent to mate.
        parent1_idx = k % parents.shape[0]
        # Index of the second parent to mate.
        parent2_idx = (k+1) % parents.shape[0]
        # The new offspring will have its first half of its genes taken from the first parent.
        offspring[k, 0:crossover_point] = parents[parent1_idx, 0:crossover_point]
        # The new offspring will have its second half of its genes taken from the second parent.
        offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]
    return offspring

In [7]:
def mutation(offspring_crossover, num_mutations=1):
    mutations_counter = numpy.uint8(offspring_crossover.shape[1] / num_mutations)
    # Mutation changes a number of genes as defined by the num_mutations argument. The changes are random.
    for idx in range(offspring_crossover.shape[0]):
        gene_idx = mutations_counter - 1
        for mutation_num in range(num_mutations):
            # The random value to be added to the gene.
            random_value = numpy.random.randint(1.0, 6.0, 1)
            offspring_crossover[idx, gene_idx] = offspring_crossover[idx, gene_idx] + random_value
            gene_idx = gene_idx + mutations_counter
    return offspring_crossover

# Main part

In [8]:
num_weights = 3 # len(equation_inputs)

sol_per_pop = 5
num_parents_mating = 3

pop_size = (sol_per_pop, num_weights)

new_population = numpy.random.randint(low=1, high=(20-4), size=pop_size)

num_generations = 100

In [9]:
HYbegin = time.time()

def objective_function(combination):
    classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth = combination[0], min_samples_leaf=combination[1], n_estimators=combination[2], oob_score=True)
    classifier_rf.fit(X_train, y_train)
    f = classifier_rf.oob_score_
    return f

def simulate_annealing(T, T_min, alpha):
    combination = solCombo_
    while T > T_min:
        new_combination = combination.copy()
        i = random.randint(0, 2)
        new_combination[i] = random.randint(1, 20)
        delta_E = objective_function(new_combination) - objective_function(combination)
        if delta_E > 0:
            combination = new_combination
        else:
            p = 2.71828 ** (delta_E / T)
            if random.uniform(0, 1) < p:
                combination = new_combination
            else:
                combination = combination
        T *= alpha
    return combination

GAbegin = time.time()
for generation in range(num_generations):

    fitness = cal_pop_fitness(new_population)
    parents = select_mating_pool(new_population, fitness,  num_parents_mating)
    offspring_crossover = crossover(parents, offspring_size=(pop_size[0]-parents.shape[0], num_weights))
    offspring_mutation = mutation(offspring_crossover, num_mutations=2)
    new_population[0:parents.shape[0], :] = parents
    new_population[parents.shape[0]:, :] = offspring_mutation
GAend = time.time()


fitness = cal_pop_fitness(new_population)
best_match_idx = numpy.where(fitness == numpy.max(fitness))
solCombo_ = new_population[best_match_idx, :][0][0]

SAbegin = time.time()
combo = simulate_annealing(10, 0.001, 0.99)
SAend = time.time()

HYend = time.time()

print("Time taken by Genetic Algorithm: " , GAend - GAbegin)
print("Time taken by    SA   Algorithm: " , SAend - SAbegin)
print("Time taken by Hybrid  Algorithm: " , HYend - HYbegin)

Time taken by Genetic Algorithm:  11.800296545028687
Time taken by    SA   Algorithm:  57.21176815032959
Time taken by Hybrid  Algorithm:  69.11620736122131


In [10]:
classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=solCombo_[0], min_samples_leaf=solCombo_[1], n_estimators=solCombo_[2], oob_score=True)
classifier_rf.fit(X_train, y_train)
fin = objective_function(combo)

In [11]:
row_contents = [solCombo_[0],solCombo_[1],solCombo_[2],classifier_rf.oob_score_,classifier_rf.score(X_test, y_test), (GAend - GAbegin), combo[0], combo[1], combo[2], fin, (SAend - SAbegin), (HYend - HYbegin)]

In [12]:
from csv import writer

def append_list_as_row(file_name, list_of_elem):
    # Open file in append mode
    with open(file_name, 'a+', newline='') as write_obj:
        # Create a writer object from csv module
        csv_writer = writer(write_obj)
        # Add contents of list as last row in the csv file
        csv_writer.writerow(list_of_elem)

In [13]:
# Append a list as new line to an old csv file
append_list_as_row('Report/finalReport_2022-12-28.csv', row_contents)

In [14]:
report = pd.read_csv('Report/finalReport_2022-12-28.csv')
report

Unnamed: 0,max_depth,min_samples_leaf,n_estimators,oob_score,classifier_score,ga_timetaken,hybrid_md,hybrid_msl,hybrid_ne,hybrid_oob_score,sa_timetaken,hybrid_timetaken
0,17,19,12,0.708995,0.654321,13.712285,11,19,14,0.714286,46.158871,60.003971
1,15,5,14,0.661376,0.654321,8.634015,8,20,14,0.714286,49.7616,58.477428
2,21,14,9,0.703704,0.617284,10.028571,17,19,13,0.714286,47.476443,57.610354
3,10,19,14,0.714286,0.641975,14.673628,18,20,14,0.714286,47.862897,62.687554
4,7,5,7,0.677249,0.62963,11.27293,18,19,13,0.714286,50.069232,61.453823
5,13,5,7,0.677249,0.62963,7.905281,4,20,14,0.714286,48.670228,56.650117
6,14,20,15,0.708995,0.641975,16.057838,9,19,13,0.714286,55.087344,71.313737
7,4,2,5,0.677249,0.604938,5.572426,6,19,13,0.714286,47.976509,53.602943
8,16,14,9,0.703704,0.617284,11.316633,6,19,13,0.714286,47.030308,58.460517
9,20,21,13,0.708995,0.641975,13.461582,20,19,13,0.714286,49.706312,63.302495
