In [18]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.model_selection import cross_val_score
import numpy as np

In [19]:
model=RandomForestClassifier()

In [20]:
dataset=pd.read_csv("diabetes.csv")

In [21]:
dataset

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [22]:
dataset.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [23]:
features=['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age']

In [24]:
scores = cross_val_score(model, dataset[features], dataset["Outcome"], cv=5)
scores

array([0.75974026, 0.73376623, 0.73376623, 0.82352941, 0.74509804])

In [25]:
scores = cross_val_score(RandomForestClassifier(n_estimators = 2), dataset[features], dataset["Outcome"], cv=5)
scores

array([0.69480519, 0.65584416, 0.71428571, 0.7254902 , 0.73856209])

In [26]:
np.mean(scores)

0.705797470503353

In [27]:
mydict={"n_estimators":100,"max_depth":1}

In [28]:
def get_fitness(model,parameters):
    our_model=model(**parameters)
    return np.mean(cross_val_score(our_model, dataset[features], dataset["Outcome"], cv=4))

In [29]:
get_fitness(RandomForestClassifier,mydict)

0.68359375

In [55]:
import random as rnd
#my_range={"n_estimators":(100,500),"max_depth":(1,30)}
def mutate(mydict):
    
    key= rnd.choice(list(mydict.keys()))
    estimator=mydict[key]
    if (isinstance(mydict[key],float)):
        guess=mydict[key]+ rnd.uniform(estimator*0.2,estimator*0.4)
    else:
        guess= int(mydict[key]+ rnd.uniform(round(estimator*0.2,0),round(estimator*0.4,0)))

    mydict[key] = guess
    return mydict
mutate({'n_estimators': 100, 'ccp_alpha':0.1, 'max_depth': 12})

{'n_estimators': 135, 'ccp_alpha': 0.1, 'max_depth': 12}

In [57]:
def create_population(parameters, number):
    population = []
    for i in range(number):
        my_dict = {}
        for parameter in parameters:
            value = parameters[parameter]
            if type(value) == float:
                value *= rnd.uniform(0,2)
                my_dict[parameter] = value
            else:
                value = int(value*rnd.uniform(0,3))
                my_dict[parameter] = value+1
        population.append(my_dict)
    return population
#create_population({'n_estimators' : 100, 'ccp_alpha' : 0.02})

In [58]:
def roulette_selection(fitness_values, model):
    total = sum(fitness_values)
    ranks = [fitness/total for fitness in fitness_values]
    selected_parents = rnd.choices([i for i in range(len(population))], weights = ranks, k = 2) 
    return selected_parents

In [59]:
population = create_population({'n_estimators' : 100, 'ccp_alpha' : 0.02},10)
fitness_values = [get_fitness(RandomForestClassifier, child) for child in population]

In [60]:
parents = roulette_selection(fitness_values, RandomForestClassifier)

In [61]:
population = create_population({'min_samples_leaf':2,'min_samples_split':2,'n_estimators' : 100, 'ccp_alpha' : 0.02, 'max_depth':10}, 20)

In [62]:
population[3]

{'min_samples_leaf': 5,
 'min_samples_split': 6,
 'n_estimators': 18,
 'ccp_alpha': 0.035562339066881804,
 'max_depth': 2}

In [63]:
get_fitness(RandomForestClassifier, population[3])

0.7252604166666666

In [64]:
def crossover(parent1, parent2):
    index = rnd.choice(list(parent1.keys()))
    parent1[index], parent2[index] = parent2[index], parent1[index]
    return parent1, parent2

crossover({"n_estimators":100,"max_depth":1},{"n_estimators":140,"max_depth":2})

({'n_estimators': 140, 'max_depth': 1}, {'n_estimators': 100, 'max_depth': 2})

In [66]:
model = RandomForestClassifier
population = create_population({'n_estimators' : 50, 'ccp_alpha' : 0.01, 'max_depth':5}, 100)
print('Calculating Fitness values!')
fitness_values = [get_fitness(model, child) for child in population]
for i in range(1000):
    print('Doing roulette selection, round', i)
    #roulette selection - probability of selection is proportional to fitness score
    parents = roulette_selection(fitness_values, model)
    print('Doing crossover, round', i)
    #crossover
    #population[parents[0]],population[parents[1]]= crossover(population[parents[0]], population[parents[1]])
    crossover0, crossover1 = crossover(population[parents[0]], population[parents[1]])
    fitness_score_parent0=get_fitness(model, crossover0)
    fitness_score_parent1=get_fitness(model, crossover1)
    if fitness_values[parents[0]]<fitness_score_parent0:
        population[parents[0]] = crossover0
        fitness_values[parents[0]] = fitness_score_parent0
    if fitness_values[parents[1]]<fitness_score_parent1:
        population[parents[1]] = crossover1
        fitness_values[parents[1]] = fitness_score_parent1
    #fitness_values[parents[0]], fitness_values[parents[1]] = fitness_score_parent0, fitness_score_parent1
    print('Doing mutation, round', i)
    #mutation - accepted only if it makes population better
    to_be_mutated = rnd.randint(0,len(population)-1)
    child = population[to_be_mutated]
    child_fitness = get_fitness(RandomForestClassifier,child)
    mutation = mutate(child)
    mutation_fitness=get_fitness(RandomForestClassifier,mutation)        
    if mutation_fitness>child_fitness:
        population[to_be_mutated] = mutation
        fitness_values[to_be_mutated] = mutation_fitness
    max_score = max(fitness_values)
    print('Max fitness score for now is', max_score, 'for parameters', population[fitness_values.index(max_score)])

Calculating Fitness values!
Doing roulette selection, round 0
Doing crossover, round 0
Doing mutation, round 0
Max fitness score for now is 0.7760416666666667 for parameters {'n_estimators': 143, 'ccp_alpha': 0.0019806766049061176, 'max_depth': 15}
Doing roulette selection, round 1
Doing crossover, round 1
Doing mutation, round 1
Max fitness score for now is 0.7760416666666667 for parameters {'n_estimators': 143, 'ccp_alpha': 0.0019806766049061176, 'max_depth': 15}
Doing roulette selection, round 2
Doing crossover, round 2
Doing mutation, round 2
Max fitness score for now is 0.7760416666666667 for parameters {'n_estimators': 143, 'ccp_alpha': 0.0019806766049061176, 'max_depth': 15}
Doing roulette selection, round 3
Doing crossover, round 3
Doing mutation, round 3
Max fitness score for now is 0.7760416666666667 for parameters {'n_estimators': 143, 'ccp_alpha': 0.0019806766049061176, 'max_depth': 15}
Doing roulette selection, round 4
Doing crossover, round 4
Doing mutation, round 4
Max f

Doing mutation, round 37
Max fitness score for now is 0.7773437500000001 for parameters {'n_estimators': 93, 'ccp_alpha': 0.0019806766049061176, 'max_depth': 15}
Doing roulette selection, round 38
Doing crossover, round 38
Doing mutation, round 38
Max fitness score for now is 0.7773437500000001 for parameters {'n_estimators': 93, 'ccp_alpha': 0.0019806766049061176, 'max_depth': 15}
Doing roulette selection, round 39
Doing crossover, round 39
Doing mutation, round 39
Max fitness score for now is 0.7773437500000001 for parameters {'n_estimators': 93, 'ccp_alpha': 0.0019806766049061176, 'max_depth': 15}
Doing roulette selection, round 40
Doing crossover, round 40
Doing mutation, round 40
Max fitness score for now is 0.7773437500000001 for parameters {'n_estimators': 93, 'ccp_alpha': 0.0019806766049061176, 'max_depth': 15}
Doing roulette selection, round 41
Doing crossover, round 41
Doing mutation, round 41
Max fitness score for now is 0.7773437500000001 for parameters {'n_estimators': 93,

KeyboardInterrupt: 