In [None]:
## The following method is for uploading the dataset from a local drive. Change if you are uploading from GDrive
import numpy as np
import matplotlib.pyplot as plt
import scipy
from PIL import Image

from scipy import ndimage
import pandas as pd
from google.colab import files
data_load = files.upload()
import io
data=pd.read_csv(io.BytesIO(data_load['pima-indians-diabetes.csv']))

Saving pima-indians-diabetes.csv to pima-indians-diabetes.csv


Check the size of the dataset, i.e. the number of rows and columns

In [None]:
# Using data.shape
print("Number of rows in the dataset = "+ str(data.shape[0]))
print("Number of columns in the dataset = "+ str(data.shape[1]))

Number of rows in the dataset = 768
Number of columns in the dataset = 9


Next, modify the dataset by removing zero values for "BloodPressure", "BMI" and "Glucose"
Then define the independent and the dependent variables (x and y)
Finally, split the dataset with training and test subsets
Check the sizes of the train and the test datasets

In [None]:
# Preprocessing :  Remove rows with zero values in BloodPressure, BMI, Glucose
data_mod = data[(data.BloodPressure!=0) & (data.BMI!=0) & (data.Glucose!=0)]

print("Number of rows in the processed dataset = "+ str(data_mod.shape[0]))
print("Number of columns in the processed dataset = "+ str(data_mod.shape[1]))

Number of rows in the processed dataset = 724
Number of columns in the processed dataset = 9


In [None]:
# Use -1 to access last value
x = data_mod.iloc[:, :-1]
y = data_mod.iloc[:, -1]

# Random test-train split using sklearn
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=64)
# Converting to numpy arrays
x_train = x_train.to_numpy()
y_train = y_train.to_numpy().reshape(-1,1)
x_test = x_test.to_numpy()
y_test = y_test.to_numpy().reshape(-1,1)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(579, 8)
(579, 1)
(145, 8)
(145, 1)


Next, we will define the structure of the network:
Create a function that takes x and y as the inputs
Use two hidden layers aside from the input and the output layer.
The function should return the number of units in the input layer, hidden layer 1, hidden layer 2 and the output layer.
Pass the training dataset to check the structure of the network.  

In [None]:
## First we will define the structure of the NN:the number of input units, number of hidden units and output units.
## Number of input units is equal to the number of features in the dataset
ip_units = x.shape[1]
## Number of output units is equal to the number of values in outcome
op_units = 1
## We can choose how many hidden units we want to use
h_units1 = 50
h_units2 = 50

def nn_arch(x, y, h_units1, h_units2):
    ip_units = x.shape[1]
    op_units = 1
    return ip_units, h_units1, h_units2, op_units

## Print the number of units in each layer
ip_units, h_units1, h_units2, op_units = nn_arch(x, y, h_units1, h_units2)
print(str(ip_units)+", "+str(h_units1)+", "+str(h_units1)+", "+str(op_units))

8, 50, 50, 1


Create a function for parameter initialization.
The function should take the units in each layer as inputs.
It should return the weights and biases for all the layers.
Use random initial weights and zero biases.


In [None]:
## We then initialize the parameters, i.e the weight and biases for each layer
def parameters_initialization(ip_units, h_units1, h_units2, op_units, scaling_factor = 0.04):
    h1_weights = np.random.rand(ip_units, h_units1) * scaling_factor
    h2_weights = np.random.rand(h_units1, h_units2) * scaling_factor
    op_weights = np.random.rand(h_units2, op_units) * scaling_factor
    weights = [h1_weights, h2_weights, op_weights]

    h1_biases = np.zeros((1,h_units1))
    h2_biases = np.zeros((1,h_units2))
    op_biases = np.zeros((1,op_units))
    biases = [h1_biases, h2_biases, op_biases]

    return weights, biases

# w, b = parameters_initialization(ip_units, h_units1, h_units2, op_units)
# print(w[0].shape)
# print(w[1].shape)
# print(w[2].shape)
# print(b[0].shape)
# print(b[1].shape)
# print(b[2].shape)

Next, we define the activation functions

In [None]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_diff(z):
  return z*(1-z)

Create a function to calculate the forward pass.
The function should take input x and the network parameters as inputs
The function returns all the "z" values and the outputs of each layer in a cache

In [None]:
## Next we define the forward pass
def forward_propagation(x, w, b):
    a1 = sigmoid(np.matmul(x, w[0]) + b[0])
    a2 = sigmoid(np.matmul(a1, w[1]) + b[1])
    a3 = sigmoid(np.matmul(a2, w[2]) + b[2])
    a = [a1, a2, a3]
    return a


#a = forward_propagation(x_train, w, b)
#print(a[2].shape)
#print(a[1].shape)
#print(a[0].shape)

Create a function to calculate the log-loss/cost.
The function takes the output of the final layer, y and the parameters as inputs
The function returns the calculated cost.
Remember that the cost should be calculated over all the training samples.

In [None]:
def cross_entropy_cost(a, y):
    # m = Total no. of samples
    m = y.shape[0]
    loss = -(y*np.log(a) + (1-y)*np.log(1-a))
    cost = np.sum(loss)/m
    return cost

Create a function to calculate the backpropagation.
The function takes network parameters, the cache from the function "forward_propagation", x and y as inputs.
The function should return the gradients, i.e "dz", "dw" and "db" values.

In [None]:
def backward_propagation(x, y, w, a):
    m = y.shape[0]
    dz3 = a[2] - y
    dw3 = np.matmul(a[1].T, dz3)
    db3 = np.sum(dz3, axis=0)/m
    dz2 = np.matmul(dz3, w[2].T)*sigmoid_diff(a[1])
    dw2 = np.matmul(a[0].T, dz2)
    db2 = np.sum(dz2, axis=0)/m
    dz1 = np.matmul(dz2, w[1].T)*sigmoid_diff(a[0])
    dw1 = np.matmul(x.T, dz1)
    db1 = np.sum(dz2, axis=0)/m
    dw = [dw1, dw2, dw3]
    db = [db1, db2, db3]
    return dw, db

# dw, db = backward_propagation(x_train, y_train, w, a)

Create a function to update the parameters.
The function takes the parameters, the gradients and the learning rate as inputs.
The function returns the parameters after updating their values.

In [None]:
def gradient_descent(lr, w, b, dw, db):
    w[0] = w[0] - lr*dw[0]
    b[0] = b[0] - lr*db[0]
    w[1] = w[1] - lr*dw[1]
    b[1] = b[1] - lr*db[1]
    w[2] = w[2] - lr*dw[2]
    b[2] = b[2] - lr*db[2]
    return w, b

Compile the model using a function.
It should take x, y, the hidden units and the number of iterations as inputs.
It should return the parameters from the gradient_descent function.
Print the cost as a function of the number of iterations.

In [None]:
def neural_network_model(x, y, lr, h_units1, h_units2, epoch_num):
    ip_units, h_units1, h_units2, op_units = nn_arch(x, y, h_units1, h_units2)
    w, b = parameters_initialization(ip_units, h_units1, h_units2, op_units)

    for i in range(epoch_num):
      a = forward_propagation(x, w, b)
      cost = cross_entropy_cost(a[2], y)
      dw, db = backward_propagation(x, y, w, a)
      w, b = gradient_descent(lr, w, b, dw, db)

      print("Epoch no.: "+str(i)+", Cost = "+str(cost))

    parameters = [w, b]
    return parameters

In [None]:
lr = 0.08
epoch_num = 100
parameters = neural_network_model(x_train, y_train, lr, 50, 50, epoch_num)
learned_weights = parameters[0]
learned_biases = parameters[1]

In [None]:
def model_predict(x, w, b):
    op_array = forward_propagation(x, w, b)
    op = op_array[2]
    y_pred = np.zeros_like(op)
    for i in range(op.shape[0]):
        if op[i]>=0.5:
            y_pred[i] = 1
        else:
            y_pred[i] = 0
    return y_pred

In [None]:
y_pred_train = model_predict(x_train, learned_weights, learned_biases)
y_pred_test = model_predict(x_test, learned_weights, learned_biases)

  return 1/(1+np.exp(-z))


In [None]:
from sklearn.metrics import accuracy_score
train_accuracy = accuracy_score(y_train, y_pred_train)
print("Train Accuracy = "+str(train_accuracy))
test_accuracy = accuracy_score(y_test, y_pred_test)
print("Test Accuracy = "+str(test_accuracy))

Train Accuracy = 0.6511226252158895
Test Accuracy = 0.6758620689655173
