Name: Swapnanil Halder, Roll : 18MA20046

# Implementing backpropagation from scratch with mini-batch gradient descent

In [1]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

class NeuralNet:
    
    def __init__(self, layers, batch_size, lr=0.1):
        self.W = []
        self.layers = layers
        self.batch_size = batch_size
        self.lr = lr
        
        for i in np.arange(0, len(layers) - 2):
            w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
            self.W.append(w / np.sqrt(layers[i]))
            
        w = np.random.randn(layers[-2] + 1, layers[-1])
        self.W.append(w)
        
    def sigmoid(self, x):
        return 1.0 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def train(self, x, y):
        A = [np.atleast_2d(x)]
        
        #forward pass
        for layer in np.arange(0, len(self.W)):
            net = A[layer].dot(self.W[layer])
            out = self.sigmoid(net)
            A.append(out)
            
        #calculating gradients in backward pass
        error = A[-1] - y
        D = [error * self.sigmoid_derivative(A[-1])]
        
        for layer in np.arange(len(A) - 2, 0, -1):
            delta = D[-1].dot(self.W[layer].T)
            delta = delta * self.sigmoid_derivative(A[layer])
            D.append(delta)
            
        D = D[::-1]
        
        #updation of model parameters by gradient descent
        for layer in np.arange(0, len(self.W)):
            self.W[layer] += -self.lr * A[layer].T.dot(D[layer])
    
    def predict(self, X, addBias=True):
        p = np.atleast_2d(X)
        if addBias:
            p = np.c_[p, np.ones((p.shape[0]))]
        
        for layer in np.arange(0, len(self.W)):
            p = self.sigmoid(np.dot(p, self.W[layer]))

        return p
    
    def find_loss(self, X, targets):
        targets = np.atleast_2d(targets)
        predictions = self.predict(X, addBias=False)
        loss = 0.5 * np.sum((predictions - targets) ** 2)
        return loss
    
    def fit(self, X, y, epochs=100):
        X = np.c_[X, np.ones((X.shape[0]))]
        for epoch in np.arange(0, epochs):
            
            for j in range(0,X.shape[0],self.batch_size):
                x = X[j:j+self.batch_size]
                target = y[j:j+self.batch_size]
                self.train(x, target)
                
            loss = self.find_loss(X, y)
            print("Epoch : {}, Loss : {:.7f}".format(epoch + 1, loss))

In [7]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0
y_train = LabelBinarizer().fit_transform(y_train)
y_test = LabelBinarizer().fit_transform(y_test)
print(x_train.shape)
print(y_train.shape)
nn = NeuralNet(layers = [x_train.shape[1], 16, 10],batch_size = 16)
nn.fit(x_train, y_train, epochs=10)

(60000, 784)
(60000, 10)
Epoch : 1, Loss : 4564.4701711
Epoch : 2, Loss : 3918.8442147
Epoch : 3, Loss : 3645.0515401
Epoch : 4, Loss : 3405.6107364
Epoch : 5, Loss : 3204.1048947
Epoch : 6, Loss : 3109.4082497
Epoch : 7, Loss : 3020.8689717
Epoch : 8, Loss : 2969.3023032
Epoch : 9, Loss : 2901.0268678
Epoch : 10, Loss : 2852.3293993


In [8]:
predictions = nn.predict(x_test)
predictions = predictions.argmax(axis=1)
print("Accuracy on test set coding backpropagation from scratch : ",accuracy_score(y_test.argmax(axis=1), predictions))

Accuracy on test set coding backpropagation from scratch :  0.9359


# Training the same neural network using libraries 

In [11]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense

def custom_loss(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    loss = 0.5 * tf.reduce_sum((y_pred - y_true) ** 2)
    return loss

i = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
model = tf.keras.Sequential([Dense(x_train.shape[1],activation = 'sigmoid',kernel_initializer=i,
    bias_initializer=i),
                         Dense(16,activation = 'sigmoid',kernel_initializer=i,
    bias_initializer=i),
                         Dense(10,activation = 'sigmoid',kernel_initializer=i, use_bias=False)])

model.compile(loss = custom_loss,optimizer = tf.keras.optimizers.SGD(learning_rate = 0.1),metrics = ['accuracy'])
model.fit(x_train, y_train, batch_size = 16,epochs = 10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1ceea1e83a0>

In [12]:
predictions = model.predict(x_test)
predictions = predictions.argmax(axis=1)
print("Accuracy on test set using standard libraries : ",accuracy_score(y_test.argmax(axis=1), predictions))

Accuracy on test set using standard libraries :  0.9398
