# <b/><center><font color='green'>XGBoost Implementation</font></center>

## <b/> Importing Libraries

In [16]:
import pandas as pd
import numpy as np
import time

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Various models for comparison
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

## <b/> Loading Dataset

In [11]:
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [12]:
df.shape

(768, 9)

In [13]:
df.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

## <b/> Splitting the Dataset

In [14]:
x = df.drop(['Outcome'], axis=1)
y = df['Outcome']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=1)

## <b/> Model Training and Evaluation

In [21]:
# various models
models = []
models.append(('Logistic Regression', LogisticRegression(solver='lbfgs', max_iter=1000)))
models.append(('KNN', KNeighborsClassifier()))
models.append(('SVM', SVC()))
models.append(('XGBoost Classifier', XGBClassifier(eta=0.01))) # eta and gamma are hyperparameters of XGBClassifier

results = []
name = []
scoring = 'accuracy'

for name, model in models:
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    predictions = [round(value) for value in y_pred]

    accuracy = accuracy_score(y_test, predictions)
    print('Accuracy: %.2f%%' % (accuracy * 100), name)

Accuracy: 77.92% Logistic Regression
Accuracy: 73.38% KNN
Accuracy: 78.57% SVM
Accuracy: 81.17% XGBoost Classifier


* Here we can observe that XGBoost performs more accurate than other models.