In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [132]:
class MLP():
    def __init__(self, X, Y, learning_rate=0.05, validation_split=None):
        self.X = X
        self.Y = Y
        self.X_train = self.X_valid = self.Y_train = self.Y_valid = None
        self.lr = learning_rate
        self.validation_split = validation_split
        self.W1 = self.W2 = self.b1 = self.b2 = self.z1 = self.z2 = self.A1 = self.A2 = None
        self.dW1 = self.dW2 = self.db1 = self.db2 = self.dz1 = self.dz2 = None
        self.training_loss = self.validation_loss = None
    
    def split_samples(self):
        indices = np.arange(N)
        np.random.shuffle(indices)
        x = self.X[:, indices]
        y = self.Y[:, indices]
        vs = 0
        m = self.X.shape[1]
        if self.validation_split is not None:
            vs = int(self.validation_split*m)
        self.X_train = x[0:(m-vs)]
        self.Y_train = y[0:(m-vs)]
        self.X_valid = x[(m-vs):]
        self.Y_valid = y[(m-vs):]
        
        
    def initialise_params(self):
        self.W1 = np.random.randn(2, 2)
        self.b1 = np.random.randn(2, 1)
        
        self.W2 = np.random.randn(1, 2)
        self.b2 = np.random.randn(1, 1)

    def sigmoid(self, x):
        return 1/(1 + np.exp(-1*x))
        
    def forward_prop(self, X):
        print(self.W1.shape)
        print(X.shape)
        print(self.b1.shape)
        self.z1 = self.W1@X + self.b1
        self.A1 = self.sigmoid(self.z1)
        
        self.z2 = self.W2@self.A1 + self.b2
        self.A2 = self.sigmoid(self.z2)
    
    def predict(self, X):
        z1 = self.W1@X + self.b1
        A1 = selr.sigmoid(z1)
        
        z2 = self.W2@A1 + self.b2
        A2 = self.sigmoid(z2)
        return A2
    
    def train_loss(self, Y):
        return np.sum(np.power(Y - self.A2, 2))
    
    def valid_loss(self, X, Y):
        Y_hat = self.predict(X)
        return np.sum(np.power(Y - Y_hat, 2))
    
    def back_prop(self, X, Y):
        self.dz2 = self.A2*(1 - self.A2) * 2*(self.A2 - Y)
        self.dW2 = self.dz2 @ self.A1.T
        self.db2 = np.sum(self.dz2, axis=1, keep_dim=True)
        
        self.dz1 = (self.W2.T @ self.dz2) * (self.A1*(1 - self.A1))
        self.dW1 = self.dz1 @ X.T
        self.db1 = np.sum(self.dz1, axis=1, keep_dim=True)
    
    def update_params(self):
        self.W1 -= self.lr * self.dW1
        self.W2 -= self.lr * self.dW2
        self.b1 -= self.lr * self.db1
        self.b2 -= self.lr * self.db2
    
    def train(self, epochs=1, mini_batch_size=1):
        
        self.split_samples()
        
        self.training_loss = np.zeros((1, epochs))
        self.validation_loss = np.zeros((1, epochs))
        
        N_mini_batch_size = int(self.X_train.shape[1] / mini_batch_size)
        print()
        
        self.initialise_params()
        
        for i in range(epochs):
            for j in range(N_mini_batch_size-1):
                X = self.X_train[:, (j)*mini_batch_size : (j+1)*mini_batch_size]
                Y = self.Y_train[:, (j)*mini_batch_size : (j+1)*mini_batch_size]
                
                self.forward_prop(X)
                
                self.training_loss[0, i] += (self.train_loss(Y))
                self.validation_loss[0, i] += (self.valid_loss(self.X_valid, self.Y_valid))
                
                self.back_prop(X, Y)
                
                self.update_params()
                
            X = self.X_train[:, (-1*N_mini_batch_size) :]
            Y = self.Y_train[:, (-1*N_mini_batch_size) :]

            self.forward_prop(X)

            self.training_loss[0, i] += (self.train_loss(Y))
            self.validation_loss[0, i] += (self.valid_loss(self.X_valid, self.Y_valid))

            self.back_prop(X, Y)

            self.update_params()  
            
            print("Epoch {} :- Training Loss is {}, Validation Loss is {}".format(i+1, self.training_loss[0, i], self.validation_loss[0, i]))

In [133]:
def generate_samples(N, operation):
    var = 0.01
    X = np.zeros((2, N))
    X[:, 0:4] = np.matrix([[0, 0, 1, 1], [0, 1, 0, 1]])
    Y = np.zeros((1, N))
    
    if(operation == "XOR"):
        Y[0, 0:4] = np.array([0, 1, 1, 0])
    elif(operation == "AND"):
        Y[0, 0:4] = np.array([0, 0, 0, 1])
    elif(operation == "OR"):
        Y[0, 0:4] = np.array([0, 1, 1, 1])
    
    for i in range(4, N):
        X[0, i] = X[0, i%4] + np.sqrt(var)*np.random.randn()
        X[1, i] = X[1, i%4] + np.sqrt(var)*np.random.randn()
        Y[0, i] = Y[0, i%4]
    
    indices = np.arange(N)
    np.random.shuffle(indices)
    x = X[:, indices]
    y = Y[:, indices]
    return x, y

In [134]:
N = 1000
X_xor, Y_xor = generate_samples(N, "XOR")
XOR = MLP(X_xor, Y_xor, validation_split = 0.1)

In [135]:
XOR.train(epochs=1, mini_batch_size=2)


(2, 2)
(2, 2)
(2, 1)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 0 is different from 2)