

Import the PIMA Indian dataset from your local drive/Google Drive

In [52]:
## The following method is for uploading the dataset from a local drive. Change if you are uploading from GDrive
import numpy as np
import matplotlib.pyplot as plt
import scipy
from PIL import Image

from scipy import ndimage
import pandas as pd
# from google.colab import files
# data_load = files.upload()
import io
data=pd.read_csv('pima-indians-diabetes.csv')
# data = np.array(data)

Check the size of the dataset, i.e. the number of rows and columns

In [53]:
rows = data.shape[0]
cols = data.shape[1]
rows, cols
type(data)

pandas.core.frame.DataFrame

Next, modify the dataset by removing zero values for "BloodPressure", "BMI" and "Glucose"
Then define the independent and the dependent variables (x and y)
Finally, split the dataset with training and test subsets
Check the sizes of the train and the test datasets

In [54]:
# remove the zero rows
data_mod = data[(data.BloodPressure != 0) & (data.BMI != 0) & (data.Glucose != 0)]
data_mod.shape

(724, 9)

In [55]:
from sklearn.model_selection import train_test_split
x = data_mod.iloc[:, :-1]
y = data_mod.iloc[:, -1]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=23)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(579, 8)
(579,)
(145, 8)
(145,)


Next, we will define the structure of the network:
Create a function that takes x and y as the inputs
Use two hidden layers aside from the input and the output layer.
The function should return the number of units in the input layer, hidden layer 1, hidden layer 2 and the output layer.
Pass the training dataset to check the structure of the network.  

In [56]:
## First we will define the structure of the NN:the number of input units, number of hidden units and output units.
## Number of input units is equal to the number of features in the dataset
## We can choose how many hidden units we want to use
## Print the number of units in each layer

def nn_structure(x, y, n_hl1, n_hl2):
    input_unit = x.shape[1]
    hidden_layer1 = n_hl1
    hidden_layer2 = n_hl2
    output_unit = 1
    return input_unit, hidden_layer1, hidden_layer2, output_unit  

# input_unit, hidden_layer1, hidden_layer2, output_unit = nn_structure(x, y, 30, 30)  

Create a function for parameter initialization.
The function should take the units in each layer as inputs.
It should return the weights and biases for all the layers.
Use random initial weights and zero biases.


In [57]:
## We then initialize the parameters, i.e the weight and biases for each layer
def parameters_initialization(n_in_neurons, n_out_neurons, scale=0.01):
    # 8*N sized weights for hidden layer 1
    # N*N for hidden layer 2
    weights = np.random.random((n_in_neurons, n_out_neurons)) * scale # initialize between 0 and 1
    biases = np.zeros((1, n_out_neurons))

    return weights, biases

# w1, b1 = parameters_initialization(input_unit, hidden_layer1)
# w2, b2 = parameters_initialization(hidden_layer1, hidden_layer2)
# w3, b3 = parameters_initialization(hidden_layer2, output_unit)
# w1.shape, w2.shape, w3.shape, b1.shape, b2.shape, b3.shape

Next, we define the activation functions

In [58]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def relu(z):
    return np.maximum(0,z)

Create a function to calculate the forward pass.
The function should take input x and the network parameters as inputs
The function returns all the "z" values and the outputs of each layer in a cache

In [59]:
## Next we define the forward pass
def forward_propagation(X, w1, w2, w3, b1, b2, b3):
    a1 = sigmoid(np.matmul(X, w1) + b1)
    a2 = sigmoid(np.matmul(a1, w2) + b2)
    Z = np.matmul(a2, w3) + b3
    return a1, a2, Z

# a1, a2, Z = forward_propagation(x_train, w1, w2, w3, b1, b2, b3)
# a1.shape, a2.shape, Z.shape

Create a function to calculate the log-loss/cost.
The function takes the output of the final layer, y and the parameters as inputs
The function returns the calculated cost.
Remember that the cost should be calculated over all the training samples.

In [60]:
def cross_entropy_cost(Z, Y):
    # Y is a pandas dataframe with dim (500,)
    Y = (np.array(Y)).reshape(Y.shape[0], 1)
    Y_hat = sigmoid(Z)
    # print(Y.shape, Y_hat.shape)
    cost = -np.sum(Y*np.log(Y_hat) + (1-Y)*np.log(1-Y_hat)) / Y.shape[0]
    return cost 

# cross_entropy_cost(Z, y_train)

Create a function to calculate the backpropagation.
The function takes network parameters, the cache from the function "forward_propagation", x and y as inputs.
The function should return the gradients, i.e "dz", "dw" and "db" values.

In [61]:
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def backward_propagation(X, Y, Z, a1, a2, w1, w2, w3):
    Y_hat = sigmoid(Z)
    dz = Y_hat - np.array(Y).reshape(-1, 1)
    m = Y.shape[0]
    dw3 = np.matmul(a2.T, (dz*sigmoid_derivative(Z))) / m
    dw2 = np.matmul(a1.T, np.matmul(dz*sigmoid_derivative(Z), w3.T)*sigmoid_derivative(a2)) / m
    dw1 = np.matmul(X.T, np.matmul(np.matmul(dz*sigmoid_derivative(Z), w3.T)*sigmoid_derivative(a2), w2.T)*sigmoid_derivative(a1)) / m

    db3 = np.sum(dz*sigmoid_derivative(Z), axis=0) / m
    db2 = np.sum(np.matmul(dz*sigmoid_derivative(Z), w3.T)*sigmoid_derivative(a2), axis=0) / m
    db1 = np.sum(np.matmul(np.matmul(dz*sigmoid_derivative(Z), w3.T)*sigmoid_derivative(a2), w2.T)*sigmoid_derivative(a1), axis=0) / m

    # reshape db1 db2 db3 to 1x30
    db1 = np.array(db1).reshape(1, -1)
    db2 = np.array(db2).reshape(1, -1)
    db3 = np.array(db3).reshape(1, -1)
    
    return dw1, dw2, dw3, db1, db2, db3 

# dw1, dw2, dw3, db1, db2, db3 = backward_propagation(x_train, y_train, Z, a1, a2, w1, w2, w3)
# dw1.shape, dw2.shape, dw3.shape, db1.shape, db2.shape, db3.shape

Create a function to update the parameters.
The function takes the parameters, the gradients and the learning rate as inputs.
The fucntion returns the parameters after updating their values.

In [62]:
def gradient_descent(w1, w2, w3, b1, b2, b3, dw1, dw2, dw3, db1, db2, db3, alpha):
    w1 = w1 - alpha * dw1
    w2 = w2 - alpha * dw2
    w3 = w3 - alpha * dw3
    b1 = b1 - alpha * db1
    b2 = b2 - alpha * db2
    b3 = b3 - alpha * db3

    return w1, w2, w3, b1, b2, b3

# w1, w2, w3, b1, b2, b3 = gradient_descent(w1, w2, w3, b1, b2, b3, dw1, dw2, dw3, db1, db2, db3, 0.01)
# w1.shape, w2.shape, w3.shape, b1.shape, b2.shape, b3.shape

In [63]:
# save the parameters dictionary in a file called 'params.pickle'
import pickle
def save_model(params):
    with open('params.pickle', 'wb') as f:
        pickle.dump(params, f)
        

Compile the model using a function.
It should take x, y, the hidden units and the number of iterations as inputs.
It should return the parameters from the gradient_descent function.
Print the cost as a function of the number of iterations.

In [64]:
def neural_network_model(X, Y, n_nodes_hl1, n_nodes_hl2, hm_epochs):
    # Define the neural network model
    input_unit, hidden_unit1, hidden_unit2, output_unit = nn_structure(X, Y, n_nodes_hl1, n_nodes_hl2)
    w1, b1 = parameters_initialization(input_unit, hidden_unit1)
    w2, b2 = parameters_initialization(hidden_unit1, hidden_unit2)
    w3, b3 = parameters_initialization(hidden_unit2, output_unit)

    # hyperparameters
    learning_rate = 0.1
    threshold = 1e-4
    
    # Train the neural network model
    for epoch in range(hm_epochs):
        a1, a2, Z = forward_propagation(X, w1, w2, w3, b1, b2, b3)
        cost = cross_entropy_cost(Z, Y)
        dw1, dw2, dw3, db1, db2, db3 = backward_propagation(X, Y, Z, a1, a2, w1, w2, w3)
        w1, w2, w3, b1, b2, b3 = gradient_descent(w1, w2, w3, b1, b2, b3, dw1, dw2, dw3, db1, db2, db3, learning_rate)

        if epoch % 10 == 0:
            print('Epoch = ', epoch, 'Cost = ', cost)
        
        previous_cost = cost

        
    parameters = {'w1': w1, 'w2': w2, 'w3': w3, 'b1': b1, 'b2': b2, 'b3': b3}
    save_model(parameters)
    return parameters

parameters = neural_network_model(x_train, y_train, 10, 10, 250)

Epoch =  0 Cost =  0    0.698669
dtype: float64


Epoch =  10 Cost =  0    0.677234
dtype: float64
Epoch =  20 Cost =  0    0.663658
dtype: float64
Epoch =  30 Cost =  0    0.655035
dtype: float64
Epoch =  40 Cost =  0    0.649515
dtype: float64
Epoch =  50 Cost =  0    0.64595
dtype: float64
Epoch =  60 Cost =  0    0.643625
dtype: float64
Epoch =  70 Cost =  0    0.642097
dtype: float64
Epoch =  80 Cost =  0    0.641085
dtype: float64
Epoch =  90 Cost =  0    0.640409
dtype: float64
Epoch =  100 Cost =  0    0.639954
dtype: float64
Epoch =  110 Cost =  0    0.639647
dtype: float64
Epoch =  120 Cost =  0    0.639436
dtype: float64
Epoch =  130 Cost =  0    0.639291
dtype: float64
Epoch =  140 Cost =  0    0.639188
dtype: float64
Epoch =  150 Cost =  0    0.639113
dtype: float64
Epoch =  160 Cost =  0    0.639057
dtype: float64
Epoch =  170 Cost =  0    0.639012
dtype: float64
Epoch =  180 Cost =  0    0.638975
dtype: float64
Epoch =  190 Cost =  0    0.638944
dtype: float64
Epoch =  200 Cost =  0    0.638916
dtype: float64
Epoch =  2

### Test
Now test the model against both the train and test datasets.

In [65]:
from sklearn.metrics import accuracy_score 

def predict(X, parameters):
    _, _, Z = forward_propagation(X, parameters['w1'], parameters['w2'], parameters['w3'], parameters['b1'], parameters['b2'], parameters['b3'])
    Y_prediction = np.where(Z > 0.5, 1, 0)
    return Y_prediction

def validate(Y_actual, Y_predicted):
    accuracy = accuracy_score(Y_actual, Y_predicted)
    return accuracy*100
    

In [66]:
# Train Data
train_accuracy = validate(y_train, predict(x_train, parameters))
train_accuracy


66.32124352331607

In [67]:
# Test Data
test_accuracy = validate(y_test, predict(x_test, parameters))
test_accuracy

62.758620689655174

: 