In [1]:
# Importing Necessary modules

import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,f1_score
from sklearn.model_selection import train_test_split
from math import log
import numpy as np
import pandas as pd

In [2]:
# Loading the data

df=pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv')
df.columns=list('ABCDEFGHI')
df

Unnamed: 0,A,B,C,D,E,F,G,H,I
0,1,85,66,29,0,26.6,0.351,31,0
1,8,183,64,0,0,23.3,0.672,32,1
2,1,89,66,23,94,28.1,0.167,21,0
3,0,137,40,35,168,43.1,2.288,33,1
4,5,116,74,0,0,25.6,0.201,30,0
...,...,...,...,...,...,...,...,...,...
762,10,101,76,48,180,32.9,0.171,63,0
763,2,122,70,27,0,36.8,0.340,27,0
764,5,121,72,23,112,26.2,0.245,30,0
765,1,126,60,0,0,30.1,0.349,47,1


Data is already cleaned, so we can directly give it to the model. We can also do preprocessing, to churn out accuracy

In [3]:
x_train,x_test,y_train,y_test=train_test_split(df.drop('I',axis=1),df["I"],test_size=0.2)

In [4]:
x_train

Unnamed: 0,A,B,C,D,E,F,G,H
403,5,168,64,0,0,32.9,0.135,41
645,1,167,74,17,144,23.4,0.447,33
683,5,136,82,0,0,0.0,0.640,69
719,4,83,86,19,0,29.3,0.317,34
744,12,100,84,33,105,30.0,0.488,46
...,...,...,...,...,...,...,...,...
72,4,129,86,20,270,35.1,0.231,23
38,4,111,72,47,207,37.1,1.390,56
129,4,173,70,14,168,29.7,0.361,33
422,2,115,64,22,0,30.8,0.421,21


In [5]:
x_test

Unnamed: 0,A,B,C,D,E,F,G,H
591,3,132,80,0,0,34.4,0.402,44
63,7,114,66,0,0,32.8,0.258,42
611,7,168,88,42,321,38.2,0.787,40
259,3,191,68,15,130,30.9,0.299,34
388,3,100,68,23,81,31.6,0.949,28
...,...,...,...,...,...,...,...,...
557,11,103,68,40,0,46.2,0.126,42
506,1,130,60,23,170,28.6,0.692,21
203,6,103,72,32,190,37.7,0.324,55
539,8,100,74,40,215,39.4,0.661,43


In [6]:
# Extracting the values

x_train=x_train.values
x_test=x_test.values
y_train=y_train.values
y_test=y_test.values

In [7]:
# Creating our custom Adaboost class

class AdaBoost:
    def __init__(self,n_estimators=20):
        self.n_estimators=n_estimators
    
    # code to train the models
    def fit(self,x,y):
        self.models=[]
        self.model_weights=[]
        d=np.ones(x.shape[0])/x.shape[0]
        for i in range(self.n_estimators):
            m=DecisionTreeClassifier(max_depth=1) # used many weak learners
            m.fit(x,y,sample_weight=d) 
            res=m.predict(x)
            k=(res!=y)
            s=np.sum(d[k])
            a=0.5*(log((1-s)/s))
            for j in range(len(d)):
                if(res[j]==y[j]):
                    d[j]*=np.exp(-a)
                else:
                    d[j]*=np.exp(a)
            d=d/d.sum()
            self.models.append(m) # saved every model here
            self.model_weights.append(a) # added the model weights for each weak learner(used at prediction time)
            
    # code to generate prediction on the trained model
    def predict(self,x):
        l=[]
        for i in x:
            d={}
            for j in range(self.n_estimators):
                res=self.models[j].predict([i])
                if(res[0] in d):
                    d[res[0]]+=self.model_weights[j]
                else:
                    d[res[0]]=self.model_weights[j]
            l1=list(d.items())
            l1.sort(key=lambda x:x[1],reverse=True)
            l.append(l1[0][0])
        return np.array(l)

In [12]:
model=AdaBoost(n_estimators=20) # Creating the object
model.fit(x_train,y_train) # training it on our data

In [13]:
y_pred=model.predict(x_test)

In [14]:
# Accuracy of the custom coded model on the test-set

print('Accuracy is: ',accuracy_score(y_test,y_pred))

Accuracy is:  0.7467532467532467


In [15]:
# F1-Score of the custom coded model on the test-set

print('F1-Score is: ',f1_score(y_test,y_pred,average='weighted'))

F1-Score is:  0.7382546935435157
