In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
class MLP():
    def __init__(X, Y, learning_rate=0.05, validation_split=None):
        self.X = X
        self.Y = Y
        self.X_train, self.X_valid, self.Y_train, self.Y_valid = None
        self.lr = learning_rate
        self.validation_split = validation_split
        self.W1, self.W2, self.b1, self.b2, self.z1, self.z2, self.A1, self.A2 = None
        self.dW1, self.dW2, self.db1, self.db2, self.dz1, self.dz2 = None
    
    def split_samples(self):
        x = np.random.shuffle(self.X)
        y = np.random.shuffle(self.Y)
        vs = 0
        m = self.X.shape[1]
        if validation_split is not None:
            vs = int(validation_split*m)
        self.X_train = x[0:(m-vs)]
        self.Y_train = y[0:(m-vs)]
        self.X_valid = x[(m-vs):]
        self.Y_valid = y[(m-vs):]
        
        
    def initialise_params(self):
        self.W1 = np.random.randn((2, 2))
        self.b1 = np.random.randn((2, 1))
        
        self.W2 = np.random.randn((1, 2))
        self.b2 = np.random.randn()

    def sigmoid(x):
        return 1/(1 + np.exp(-1*x))
        
    def forward_prop(self, X):
        self.z1 = self.W1@X + self.b1
        self.A1 = sigmoid(self.z1)
        
        self.z2 = self.W2@self.A1 + self.b2
        self.A2 = sigmoid(self.z2)
    
    def predict(self, X):
        z1 = self.W1@X + self.b1
        A1 = sigmoid(z1)
        
        z2 = self.W2@A1 + self.b2
        A2 = sigmoid(z2)
        return A2
    
    def train_loss(self, Y):
        return np.sum(np.power(self.Y_train - self.A2, 2))
    
    def valid_loss(self, X, Y):
        Y_hat = self.predict(X)
        return np.sum(np.power(Y - Y_hat, 2))
    
    def back_prop(self, X, Y):
        self.dz2 = self.A2*(1 - self.A2) * 2*(self.A2 - Y)
        self.dW2 = self.dz2 @ self.A1.T
        self.db2 = np.sum(self.dz2, axis=1, keep_dim=True)
        
        self.dz1 = (self.W2.T @ self.dz2) * (self.A1*(1 - self.A1))
        self.dW1 = self.dz1 @ X.T
        self.db1 = np.sum(self.dz1, axis=1, keep_dim=True)
    
    def update_params(self):
        self.W1 -= self.lr * self.dW1
        self.W2 -= self.lr * self.dW2
        self.b1 -= self.lr * self.db1
        self.b2 -= self.lr * self.db2
    
    def train(self, epochs, mini_batch_size=1):
        
        training_loss = np.zeros((1, epochs))
        validation_loss = np.zeros((1, epochs))
        
        N_mini_batch_size = int(self.X_train.shape[1] / mini_batch_size)
        
        self.initialise_params()
        
        for i in range(epochs):
            for j in range(N_mini_batch_size-1):
                X = self.X_train[:, (j-1)*N_mini_batch_size : j*N_mini_batch_size]
                Y = self.Y_train[:, (j-1)*N_mini_batch_size : j*N_mini_batch_size]
                
                self.forward_prop(X)
                
                training_loss[0, i] += (self.train_loss(Y))
                validation_loss[0, i] += (self.valid_loss(self.X_valid, self.Y_valid))
                
                self.back_prop(X, Y)
                
                self.update_params()
                
            X = self.X_train[:, (-1*N_mini_batch_size) :]
            Y = self.Y_train[:, (-1*N_mini_batch_size) :]

            self.forward_prop(X)

            training_loss[0, i] += (self.train_loss(Y))
            validation_loss[0, i] += (self.valid_loss(self.X_valid, self.Y_valid))

            self.back_prop(X, Y)

            self.update_params()  
            
            print("Epoch {} :- Training Loss is {}, Validation Loss is {}".format(i+1, training_loss[0, i], validation_loss[0, i]))