In [1]:
import numpy
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from tqdm import tqdm
import time
import warnings
warnings.simplefilter("ignore", UserWarning)

In [2]:
best_outputs =[]
df = pd.read_csv('dataset/heart_v2.csv')
df.head()

Unnamed: 0,age,sex,BP,cholestrol,heart disease
0,70,1,130,322,1
1,67,0,115,564,0
2,57,1,124,261,1
3,64,1,128,263,0
4,74,0,120,269,0


In [3]:
X = df.drop('heart disease',axis=1)
y = df['heart disease']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)
X_train.shape, X_test.shape

((189, 4), (81, 4))

# Genetic algorithm

In [5]:
def cal_pop_fitness(pop):
    # Calculating the fitness value of each solution in the current population.
    fitness = []
    for i in range(len(pop)):
        if pop[i][1] >= 1:
            classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth = pop[i][0], min_samples_leaf=pop[i][1], n_estimators=pop[i][2], oob_score=True)
            classifier_rf.fit(X_train, y_train)
            fitness.append(classifier_rf.oob_score_)
            # print(fitness)
        else:
            fitness.append(0)
    best_outputs.append(numpy.max(fitness))
    return fitness

In [6]:
def select_mating_pool(pop, fitness, num_parents):
    # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
    parents = numpy.empty((num_parents, pop.shape[1]))
    for parent_num in range(num_parents):
        max_fitness_idx = numpy.where(fitness == numpy.max(fitness))
        max_fitness_idx = max_fitness_idx[0][0]
        parents[parent_num, :] = pop[max_fitness_idx, :]
        fitness[max_fitness_idx] = -99999
    return parents

In [7]:
def crossover(parents, offspring_size):
    offspring = numpy.empty(offspring_size)
    # The point at which crossover takes place between two parents. Usually, it is at the center.
    crossover_point = numpy.uint8(offspring_size[1]/2)

    for k in range(offspring_size[0]):
        # Index of the first parent to mate.
        parent1_idx = k % parents.shape[0]
        # Index of the second parent to mate.
        parent2_idx = (k+1) % parents.shape[0]
        # The new offspring will have its first half of its genes taken from the first parent.
        offspring[k, 0:crossover_point] = parents[parent1_idx, 0:crossover_point]
        # The new offspring will have its second half of its genes taken from the second parent.
        offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]
    return offspring

In [8]:
def mutation(offspring_crossover, num_mutations=1):
    mutations_counter = numpy.uint8(offspring_crossover.shape[1] / num_mutations)
    # Mutation changes a number of genes as defined by the num_mutations argument. The changes are random.
    for idx in range(offspring_crossover.shape[0]):
        gene_idx = mutations_counter - 1
        for mutation_num in range(num_mutations):
            # The random value to be added to the gene.
            random_value = numpy.random.randint(1.0, 6.0, 1)
            offspring_crossover[idx, gene_idx] = offspring_crossover[idx, gene_idx] + random_value
            gene_idx = gene_idx + mutations_counter
    return offspring_crossover

# Main part

In [9]:
num_weights = 3 # len(equation_inputs)

sol_per_pop = 5
num_parents_mating = 3

pop_size = (sol_per_pop, num_weights)

new_population = numpy.random.randint(low=1, high=(20-4), size=pop_size)
print(new_population)

num_generations = 100

[[15  1 15]
 [ 6  1  3]
 [14  6 15]
 [11 13 13]
 [12  9 15]]


In [10]:
GAbegin = time.time()
HYbegin = time.time()
for generation in tqdm(range(num_generations)):

    #print("\nGeneration : ", generation)
    fitness = cal_pop_fitness(new_population)
    # print('Fitness: ')
    # print(fitness)

    parents = select_mating_pool(new_population, fitness,  num_parents_mating)
    # print("Parents: ")
    # print(parents)

    offspring_crossover = crossover(parents, offspring_size=(pop_size[0]-parents.shape[0], num_weights))
    # print("Crossover")
    # print(offspring_crossover)

    offspring_mutation = mutation(offspring_crossover, num_mutations=2)
    # print("Mutation")
    # print(offspring_mutation)

    new_population[0:parents.shape[0], :] = parents
    new_population[parents.shape[0]:, :] = offspring_mutation

fitness = cal_pop_fitness(new_population)
# print(fitness)

best_match_idx = numpy.where(fitness == numpy.max(fitness))
GAend = time.time()

print("Best solution : \t\t", new_population[best_match_idx, :][0][0])
solCombo_ = new_population[best_match_idx, :][0][0]
print("Best solution fitness : \t", fitness[0])
print("Time taken : \t\t\t", (GAend - GAbegin))

100%|██████████| 100/100 [00:20<00:00,  4.97it/s]


Best solution : 		 [21 20 15]
Best solution fitness : 	 0.708994708994709
Time taken : 			 20.28777503967285


In [11]:
# print(solCombo_)
classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=solCombo_[0], min_samples_leaf=solCombo_[1], n_estimators=solCombo_[2], oob_score=True)
classifier_rf.fit(X_train, y_train)

In [12]:
print("oob score :\t\t", classifier_rf.oob_score_) # checking the oob score
print("classifier score :\t", classifier_rf.score(X_test, y_test)) # checking the model score

oob score :		 0.708994708994709
classifier score :	 0.6419753086419753


# Applying Simulated Anneling

In [14]:
import random

def objective_function(combination):
    classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth = combination[0], min_samples_leaf=combination[1], n_estimators=combination[2], oob_score=True)
    classifier_rf.fit(X_train, y_train)
    f = classifier_rf.oob_score_
    return f

def simulate_annealing(T, T_min, alpha):
    combination = solCombo_
    while T > T_min:
        new_combination = combination.copy()
        # randomly modify one of the numbers in the combination
        i = random.randint(0, 2)
        new_combination[i] = random.randint(1, 20)
        delta_E = objective_function(new_combination) - objective_function(combination)
        if delta_E > 0:
            combination = new_combination
        else:
            p = 2.71828 ** (delta_E / T)
            if random.uniform(0, 1) < p:
                combination = new_combination
            else:
                combination = combination
        T *= alpha
        print(T)
    return combination

combo = simulate_annealing(10, 0.001, 0.99)
HYend = time.time()

print('\n\n')
print('Best solution :\t\t',combo)
fin = objective_function(combo)
print('oob score :\t\t', fin)
print('time taken :\t\t', HYend - HYbegin)

9.9
9.801
9.70299
9.605960099999999
9.509900498999999
9.414801494009998
9.320653479069898
9.2274469442792
9.135172474836407
9.043820750088043
8.953382542587162
8.863848717161291
8.775210229989678
8.687458127689782
8.600583546412883
8.514577710948755
8.429431933839268
8.345137614500876
8.261686238355868
8.17906937597231
8.097278682212586
8.01630589539046
7.936142836436555
7.856781408072189
7.778213593991468
7.700431458051553
7.623427143471037
7.547192872036327
7.4717209433159635
7.397003733882804
7.323033696543976
7.249803359578536
7.177305325982751
7.105532272722923
7.034476949995693
6.964132180495737
6.894490858690779
6.825545950103871
6.757290490602832
6.689717585696804
6.622820409839836
6.556592205741437
6.491026283684023
6.426116020847182
6.361854860638711
6.298236312032324
6.235253948912001
6.172901409422881
6.111172395328652
6.050060671375365
5.989560064661611
5.929664464014995
5.870367819374845
5.8116641411810965
5.753547499769286
5.696012024771592
5.6390519045238765
5.582661385

```
MAX val:

0.7142857142857143


```