In [8]:
# Step 1 - Loading the Required Libraries and Modules
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.metrics import Accuracy, Precision, Recall

In [9]:
# Step 2 - Reading the Data and Performing Basic Data Checks
df = pd.read_csv('diabetes.csv') 
print(df.shape)
df.describe().transpose()

(768, 9)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Pregnancies,768.0,3.845052,3.369578,0.0,1.0,3.0,6.0,17.0
Glucose,768.0,120.894531,31.972618,0.0,99.0,117.0,140.25,199.0
BloodPressure,768.0,69.105469,19.355807,0.0,62.0,72.0,80.0,122.0
SkinThickness,768.0,20.536458,15.952218,0.0,0.0,23.0,32.0,99.0
Insulin,768.0,79.799479,115.244002,0.0,0.0,30.5,127.25,846.0
BMI,768.0,31.992578,7.88416,0.0,27.3,32.0,36.6,67.1
DiabetesPedigreeFunction,768.0,0.471876,0.331329,0.078,0.24375,0.3725,0.62625,2.42
Age,768.0,33.240885,11.760232,21.0,24.0,29.0,41.0,81.0
Outcome,768.0,0.348958,0.476951,0.0,0.0,0.0,1.0,1.0


In [10]:
# Step 3 - Creating Arrays for the Features and the Response Variable
target_column = 'Outcome'
predictors = list(set(list(df.columns))-set([target_column]))
df[predictors] = df[predictors]/df[predictors].max()
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Pregnancies,768.0,0.22618,0.19821,0.0,0.058824,0.176471,0.352941,1.0
Glucose,768.0,0.60751,0.160666,0.0,0.497487,0.58794,0.704774,1.0
BloodPressure,768.0,0.566438,0.158654,0.0,0.508197,0.590164,0.655738,1.0
SkinThickness,768.0,0.207439,0.161134,0.0,0.0,0.232323,0.323232,1.0
Insulin,768.0,0.094326,0.136222,0.0,0.0,0.036052,0.150414,1.0
BMI,768.0,0.47679,0.117499,0.0,0.406855,0.4769,0.545455,1.0
DiabetesPedigreeFunction,768.0,0.19499,0.136913,0.032231,0.100723,0.153926,0.258781,1.0
Age,768.0,0.410381,0.145188,0.259259,0.296296,0.358025,0.506173,1.0
Outcome,768.0,0.348958,0.476951,0.0,0.0,0.0,1.0,1.0


In [11]:
# Step 4 - Creating the Training and Test Datasets
np.random.seed(40)
train_df = df.sample(frac=0.7, random_state=40)
test_df = df.drop(train_df.index)

X_train = train_df[predictors].values
y_train = to_categorical(train_df[target_column].values)

X_test = test_df[predictors].values
y_test = to_categorical(test_df[target_column].values)

print(X_train.shape); print(X_test.shape)

(538, 8)
(230, 8)


In [13]:
# Step 5 - Building, Predicting, and Evaluating the Neural Network Model
model = Sequential()
model.add(Dense(8, activation='relu', input_shape=(len(predictors),)))
model.add(Dense(8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[Accuracy(), Precision(), Recall()])
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)

train_loss, train_acc, train_prec, train_rec = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_acc, test_prec, test_rec = model.evaluate(X_test, y_test, verbose=0)
# print('Train Accuracy:', train_acc)
# print('Test Accuracy:', test_acc)

# predict_train = np.argmax(model.predict(X_train), axis=1)
# predict_test = np.argmax(model.predict(X_test), axis=1)

train_loss, train_acc, train_prec, train_rec = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_acc, test_prec, test_rec = model.evaluate(X_test, y_test, verbose=0)

print('Train Accuracy:', train_acc)
print('Train Precision:', train_prec)
print('Train Recall:', train_rec)

print('Test Accuracy:', test_acc)
print('Test Precision:', test_prec)
print('Test Recall:', test_rec)


Train Accuracy: 0.0
Train Precision: 0.7081784605979919
Train Recall: 0.7081784605979919
Test Accuracy: 0.0
Test Precision: 0.6739130616188049
Test Recall: 0.6739130616188049
