In [1]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.model_selection import cross_val_score
import numpy as np

In [2]:
model=RandomForestClassifier()

In [3]:
dataset=pd.read_csv("diabetes.csv")

In [4]:
dataset

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [5]:
dataset.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [6]:
features=['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age']

In [7]:
scores = cross_val_score(model, dataset[features], dataset["Outcome"], cv=5)
scores

array([0.75974026, 0.75324675, 0.75974026, 0.81699346, 0.74509804])

In [8]:
scores = cross_val_score(RandomForestClassifier(n_estimators = 200), dataset[features], dataset["Outcome"], cv=5)
scores

array([0.76623377, 0.72077922, 0.77922078, 0.83006536, 0.77777778])

In [9]:
np.mean(scores)

0.7748153806977336

In [158]:
def get_fitness(model,parameters, dataset, features, target):
    our_model=model(**parameters)
    return np.mean(cross_val_score(our_model, dataset[features], dataset[target], cv=4))

In [11]:
import random as rnd
def create_population(parameters, number):
    population = []
    for i in range(number):
        my_dict = {}
        for parameter in parameters:
            value = parameters[parameter]
            if type(value) == float:
                value *= rnd.uniform(0,2)
                my_dict[parameter] = value
            else:
                value = int(value*rnd.uniform(0,3))
                my_dict[parameter] = value+1
        population.append(my_dict)
    return population
#create_population({'n_estimators' : 100, 'ccp_alpha' : 0.02})

In [83]:
def update_particle_position(particle, inertia, c1, c2,pbest,gbest, population, velocities):
    #print(population[particle].keys())
    for parameter in population[particle].keys():
        position = population[particle][parameter]
        if type(position)==int:
            velocity = inertia*velocities[particle][parameter]+c1*rnd.uniform(0,1)*(pbest[particle][parameter]-position)+c2*rnd.uniform(0,1)*(gbest[parameter]-position)
            if (abs(velocity))>25:
                velocity = 25
            update = int(position + velocity)
            if update <= 0:
                population[particle][parameter] = pbest[particle][parameter]
            else:
                population[particle][parameter] = update
        else:
            velocity = inertia*velocities[particle][parameter]+c1*rnd.uniform(0,1)*(pbest[particle][parameter]-position)+c2*rnd.uniform(0,1)*(gbest[parameter]-position)
            if (abs(velocity))>25:
                velocity = 25
            update = position + velocity
            if update <= 0:
                population[particle][parameter] = pbest[particle][parameter]
            else:
                population[particle][parameter] = update
        velocities[particle][parameter] = velocity

#update_particle_position(0, 0.7,1.1,1.2)

In [84]:
def velocity_initialization(abc):
    for parameter in abc:
        if type(abc[parameter])==int:
            abc[parameter] = abc[parameter]*0.5
        else:
            abc[parameter] = abc[parameter]*0.5
    return abc

In [150]:
def chaotic_weight_inertia(i, max_i, inertia):
    inertia = (1-(i/max_i))*inertia
    return inertia

In [151]:
chaotic_weight_inertia(1,10,0.8)

0.7200000000000001

In [188]:
import copy
def training(model,initial_guess, inertia, c1, c2, size, generations,dataset, features, target):
    t = []
    model = model
    population=create_population(initial_guess, size)
    #pbest= population
    pbest = [copy.deepcopy(value) for value in population]
    print('Calculating fitnesses!')
    pbest_fitnesses=[get_fitness(model,child,dataset,features, target) for child in pbest]
    gbest_score = max(pbest_fitnesses)
    gbest = population[pbest_fitnesses.index(gbest_score)]
    velocities=[velocity_initialization(initial_guess) for i in range(size)]
    for i in range(generations):
        print('generation', i)
        print('Updating positions')
        inertia = chaotic_weight_inertia(i, generations, inertia)
        for index,child in enumerate(population):
            update_particle_position(index,inertia,c1,c2,pbest,gbest, population, velocities)
            score=get_fitness(model,child,dataset,features, target)
            #print(score)
            if (score>pbest_fitnesses[index]):
                pbest[index]=child
                pbest_fitnesses[index]=score
            #print((gbest_score<score))
            if (gbest_score<score):
                gbest_score=score
                gbest=child
        #if (i%5==0):
        #print(f"Population at generation {i} is {population}")
        #if (i%50==0):
        #print(f'Best score for now is {gbest_score} with {gbest} parameters with {inertia} inertia,{c1} as c1, and {c2} as c2')
        t.append(gbest_score)
    return [t, gbest_score, gbest]

In [189]:
features=['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age']
best_output = training(RandomForestClassifier, {'n_estimators' : 70, 'max_depth':4}, 0.8,1.5,1.5,10,100,dataset, features, "Outcome")

Calculating fitnesses!
generation 0
Updating positions
generation 1
Updating positions
generation 2
Updating positions
generation 3
Updating positions
generation 4
Updating positions
generation 5
Updating positions
generation 6
Updating positions
generation 7
Updating positions
generation 8
Updating positions
generation 9
Updating positions
generation 10
Updating positions
generation 11
Updating positions
generation 12
Updating positions
generation 13
Updating positions
generation 14
Updating positions
generation 15
Updating positions
generation 16
Updating positions
generation 17
Updating positions
generation 18
Updating positions
generation 19
Updating positions
generation 20
Updating positions
generation 21
Updating positions
generation 22
Updating positions
generation 23
Updating positions
generation 24
Updating positions
generation 25
Updating positions
generation 26
Updating positions
generation 27
Updating positions
generation 28
Updating positions
generation 29
Updating positio

In [147]:
import matplotlib.pyplot as plt
import random as random

In [149]:
new_dataset= pd.read_csv('creditcard.csv')

In [160]:
new_dataset.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [184]:
updated_dataset = new_dataset.loc[new_dataset['Class'] == 1]
df = new_dataset.loc[new_dataset['Class'] == 0][0:600]
updated_dataset = updated_dataset.append(df)
updated_dataset

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
541,406.0,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.177840,0.261145,-0.143276,0.00,1
623,472.0,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,529.00,1
4920,4462.0,-2.303350,1.759247,-0.359745,2.330243,-0.821628,-0.075788,0.562320,-0.399147,-0.238253,...,-0.294166,-0.932391,0.172726,-0.087330,-0.156114,-0.542628,0.039566,-0.153029,239.93,1
6108,6986.0,-4.397974,1.358367,-2.592844,2.679787,-1.128131,-1.706536,-3.496197,-0.248778,-0.247768,...,0.573574,0.176968,-0.436207,-0.053502,0.252405,-0.657488,-0.827136,0.849573,59.00,1
6329,7519.0,1.234235,3.019740,-4.304597,4.732795,3.624201,-1.357746,1.713445,-0.496358,-1.282858,...,-0.379068,-0.704181,-0.656805,-1.632653,1.488901,0.566797,-0.010016,0.146793,1.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,448.0,-0.242497,0.891170,1.164787,1.025392,0.729123,0.663501,0.760560,-0.004531,0.037370,...,-0.161343,0.047255,-0.302695,-0.931315,-0.008611,-0.187111,0.259246,-0.030635,11.99,0
597,449.0,-0.856525,0.583290,1.389014,-0.344699,0.267594,-0.951375,0.523117,-0.049229,-0.076944,...,-0.056911,-0.309940,0.167010,0.359246,-0.969651,-0.185793,-0.136897,0.233672,10.20,0
598,450.0,1.216891,-0.735671,0.236358,-0.579814,-1.178604,-0.930514,-0.386018,-0.120917,-1.081759,...,-0.291514,-0.586820,0.009280,0.557000,0.163173,1.005774,-0.087848,0.006504,75.65,0
599,452.0,-0.693683,-0.166272,2.036281,-2.227708,-0.186090,0.885566,-0.113836,0.165882,-0.889372,...,0.376480,1.048357,-0.544974,-1.143769,0.419313,-0.084655,-0.212912,-0.200410,49.98,0


In [181]:
features = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']
output = training(RandomForestClassifier, {'n_estimators' : 80, 'max_depth':5}, 0.8,1.5,1.5,10,100,updated_dataset, features, "Class")

Calculating fitnesses!


KeyboardInterrupt: 


After finding the two best values, the particle updates its velocity and positions with following equation (a) and (b).

v[] = v[] + c1 * rand() * (pbest[] - present[]) + c2 * rand() * (gbest[] - present[]) (a)
present[] = present[] + v[] (b)

v[] is the particle velocity, persent[] is the current particle (solution). pbest[] and gbest[] are defined as stated before. rand () is a random number between (0,1). c1, c2 are learning factors. usually c1 = c2 = 2. 

In [None]:
font = {'family' : 'normal',
        'weight' : '100',
        'size'   : 22}

plt.rc('font', **font)

plt.figure(figsize=(20, 10))
plt.plot(best_output)
plt.title("PSO algorithm score progression")
plt.xlabel("Generation")
plt.ylabel("Fitness score")
