In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load Dataset

In [2]:
df = pd.read_csv('./datasets/diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Normalization

In [4]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
X

array([[ 0.63994726,  0.84832379,  0.14964075, ...,  0.20401277,
         0.46849198,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.68442195,
        -0.36506078, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -1.10325546,
         0.60439732, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ..., -0.73518964,
        -0.68519336, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.24020459,
        -0.37110101,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.20212881,
        -0.47378505, -0.87137393]])

# Train Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [6]:
X_train.shape, y_train.shape

((614, 8), (614,))

In [7]:
X_test.shape, y_test.shape

((154, 8), (154,))

# ANN - Artificial Neural Network

In [8]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier(hidden_layer_sizes=128, max_iter=200)
model.fit(X_train, y_train)



In [9]:
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

In [10]:
from sklearn.metrics import accuracy_score, recall_score, precision_score

acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)

acc_train, acc_test

(0.8192182410423453, 0.7597402597402597)

In [11]:
p = precision_score(y_test, y_pred_test)
p

0.7954545454545454

In [12]:
r = recall_score(y_test, y_pred_test)
r

0.5555555555555556

# PCA

In [14]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
pca.fit(X_train)

X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

In [15]:
X_train.shape, X_test.shape

((614, 3), (154, 3))

In [16]:
model = MLPClassifier(hidden_layer_sizes=128, max_iter=200)
model.fit(X_train, y_train)

In [18]:
acc_train = accuracy_score(y_train, model.predict(X_train))
acc_test = accuracy_score(y_test, model.predict(X_test))

acc_train, acc_test

(0.752442996742671, 0.7467532467532467)