<a href="https://colab.research.google.com/github/NurFaizin/ann-backpropagation/blob/main/ann_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from csv import reader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
# Load a CSV file
def load_csv(filename, skip_header=True):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        if skip_header:
            next(csv_reader, None)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return np.array(dataset)

In [3]:
# ReLU activation function
def relu(z):
    a = np.maximum(0,z)
    return a

In [4]:
# Sigmoid activation function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [5]:
# Network initializations
def init_network(layer_sizes):
    network = {}
    for i in range(1, len(layer_sizes)):
        network['W' + str(i)] = np.random.randn(layer_sizes[i], layer_sizes[i-1])*0.01
        network['B' + str(i)] = np.random.randn(layer_sizes[i],1)*0.01
    return network

In [6]:
# Feed forward
def forward_propagation(X_train, params):
    layers = len(params)//2
    values = {}
    for i in range(1, layers+1):
        if i==1:
            values['Z' + str(i)] = np.dot(params['W' + str(i)], X_train) + params['B' + str(i)]
            values['A' + str(i)] = sigmoid(values['Z' + str(i)])
        else:
            values['Z' + str(i)] = np.dot(params['W' + str(i)], values['A' + str(i-1)]) + params['B' + str(i)]
            if i==layers:
                values['A' + str(i)] = values['Z' + str(i)]
            else:
                values['A' + str(i)] = sigmoid(values['Z' + str(i)])
    return values

In [7]:
# Compute the error
def compute_error(values, Y_train):
    layers = len(values)//2
    Y_pred = values['A' + str(layers)]
    error = 1/(2*len(Y_train)) * np.sum(np.square(Y_pred - Y_train))
    return error

In [8]:
# Back propagation
def backward_propagation(params, values, X_train, Y_train):
    layers = len(params)//2
    m = len(Y_train)
    grads = {}
    for i in range(layers,0,-1):
        if i==layers:
            dA = 1/m * (values['A' + str(i)] - Y_train)
            dZ = dA
        else:
            dA = np.dot(params['W' + str(i+1)].T, dZ)
            dZ = np.multiply(dA, np.where(values['A' + str(i)]>=0, 1, 0))
        if i==1:
            grads['W' + str(i)] = 1/m * np.dot(dZ, X_train.T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
        else:
            grads['W' + str(i)] = 1/m * np.dot(dZ,values['A' + str(i-1)].T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
    return grads


In [9]:
# Update parameters
def update_params(params, grads, learning_rate):
    layers = len(params)//2
    params_updated = {}
    for i in range(1,layers+1):
        params_updated['W' + str(i)] = params['W' + str(i)] - learning_rate * grads['W' + str(i)]
        params_updated['B' + str(i)] = params['B' + str(i)] - learning_rate * grads['B' + str(i)]
    return params_updated

In [10]:
# Train model
def train(X_train, Y_train, layer_sizes, num_iters, learning_rate):
    params = init_network(layer_sizes)
    for i in range(num_iters):
        values = forward_propagation(X_train.T, params)
        cost = compute_error(values, Y_train.T)
        grads = backward_propagation(params, values,X_train.T, Y_train.T)
        params = update_params(params, grads, learning_rate)
        print('Error at epoch ' + str(i+1) + ' = ' + str(cost) + '\n')
    return params

In [11]:
# Compute the acccuracy
def compute_accuracy(X_train, X_test, Y_train, Y_test, params):
    values_train = forward_propagation(X_train.T, params)
    values_test = forward_propagation(X_test.T, params)
    train_acc = mean_squared_error(Y_train, values_train['A' + str(len(layer_sizes)-1)].T)
    test_acc = mean_squared_error(Y_test, values_test['A' + str(len(layer_sizes)-1)].T)
    return train_acc, test_acc

In [12]:
# Predict
def predict(X, params):
    values = forward_propagation(X.T, params)
    predictions = values['A' + str(len(values)//2)].T
    return predictions

In [27]:
data = load_csv('/content/drive/MyDrive/Colab Notebooks/diabetes.csv')            #load dataset
X = data[:, 0:8].astype('float')                                                  #get input columns
Y = data[:, 8].astype('int')                                                      #get target column
X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size = 0.2)           #split data into train and test sets in 80-20 ratio

In [37]:
layer_sizes = [8, 5, 1]                                                       #set layer sizes, do not change the size of the first and last layer 
num_iters = 1000                                                                    #set number of iterations over the training set(also known as epochs in batch gradient descent context)
learning_rate = 0.05                                                              #set learning rate for gradient descent

In [35]:
model = train(X_train, Y_train, layer_sizes, num_iters, learning_rate)           #train the model
train_acc, test_acc = compute_accuracy(X_train, X_test, Y_train, Y_test, model)  #get training and test accuracy

Cost at iteration 1 = 0.17878508173539429

Cost at iteration 2 = 0.17876922027943637

Cost at iteration 3 = 0.17875336270974534

Cost at iteration 4 = 0.17873750902536042

Cost at iteration 5 = 0.17872165922532096

Cost at iteration 6 = 0.17870581330866664

Cost at iteration 7 = 0.17868997127443734

Cost at iteration 8 = 0.17867413312167324

Cost at iteration 9 = 0.1786582988494147

Cost at iteration 10 = 0.17864246845670237

Cost at iteration 11 = 0.17862664194257716

Cost at iteration 12 = 0.1786108193060801

Cost at iteration 13 = 0.17859500054625263

Cost at iteration 14 = 0.17857918566213637

Cost at iteration 15 = 0.1785633746527731

Cost at iteration 16 = 0.17854756751720502

Cost at iteration 17 = 0.1785317642544744

Cost at iteration 18 = 0.17851596486362384

Cost at iteration 19 = 0.17850016934369617

Cost at iteration 20 = 0.17848437769373446

Cost at iteration 21 = 0.17846858991278208

Cost at iteration 22 = 0.17845280599988247

Cost at iteration 23 = 0.1784370259540796

Co

In [36]:
print('Root Mean Squared Error on Training Data = ' + str(train_acc))
print('Root Mean Squared Error on Test Data = ' + str(test_acc))

Root Mean Squared Error on Training Data = 0.3294295300759103
Root Mean Squared Error on Test Data = 0.31624318385953
