# Multilayer Neural Net implementation using numpy
#### Using sigmoid activation and gradient descent
<hr>

### Imports
Only need numpy and pickle

In [1]:
import numpy as np
import pickle as pkl

We need to one hot encode the output for being able to train the neural net.

In [2]:
def onehot(T,nclasses):
    """One hot encodes the given T column vector made for MNIST"""
    y = np.zeros((T.shape[0],n_classes))
    for i,d in enumerate(T):
        y[i,d-1] = 1
    return y

### An initialization for the Multilayer Neural Net in the system.
Takes parameters for just the layer size.

In [3]:
class MultilayerNN:
    """ Multilayer Neural Net"""
    def __init__(self, input_size, output_size, hidden_layers = None):
        """Initializes a neural net with random weights of given input size, 
        output size and the mentioned hidden layers"""
        self.w = []
        if hidden_layers == None or len(hidden_layers) == 0:
            self.w.append(self._gen_layer_weights(input_size+1,output_size))
        layers = len(hidden_layers)
        if layers == 1:
            self.w.append(self._gen_layer_weights(input_size+1,hidden_layers[0]))
            self.w.append(self._gen_layer_weights(hidden_layers[-1]+1,output_size))
        else:
            self.w.append(self._gen_layer_weights(input_size+1,hidden_layers[0]))
            for i in range(1,layers):
                self.w.append(self._gen_layer_weights(hidden_layers[i-1]+1,hidden_layers[i]))
            self.w.append(self._gen_layer_weights(hidden_layers[-1]+1,output_size))
        self.nlayers = len(self.w)

### Generates random weights for the layer
Just a simple random generation $\sigma(0,1)$

In [4]:
def _gen_layer_weights(m, n):
    """Generates uniform distribution of weights with mean 0 and variance 1 for layer of shape (m,n)"""
    return np.random.normal(0,1,(m,n))

MultilayerNN._gen_layer_weights = _gen_layer_weights

### Sigmoid activation function with a derivative
Sigmoid = $\frac{1}{1-e^{-z}}$

In [5]:
def _sigmoid(self,X, deriv=False):
    """Calculates the sigmoid of the given X, 
    if deriv is true, returns the derivative of sigmoid"""
    if deriv:
        return self._sigmoid(X)*self._sigmoid(1-X)
    return (1/(1+np.exp(-X)))

MultilayerNN._sigmoid = _sigmoid

### Fitting the Neural Net to the dataset
Usign gradient descent

In [6]:
def fit(self,X,Y,LR,epochs):
    """Fits the neural net to the given data X and Y with learning rate LR for given epochs"""
    j = np.zeros((epochs,1))
    m = Y.shape[0]
    for i in range(epochs):
        out = self._train_epoch(X,Y,LR)
        j[i] = self._cost(out,Y)
        acc = np.sum(np.argmax(out,axis=1) == np.argmax(Y,axis=1))/m
        print("%d - Acc: %f Cost: %f"%(i,acc,j[i]))
    return j

MultilayerNN.fit = fit

In [7]:
def _train_epoch(self,X,Y,LR):
    """Carries out one training epoch of the neural network."""
    m = Y.shape[0]
    layers = [None]*self.nlayers
    lin = X
    for i,t in enumerate(self.w):
        lin2 = np.column_stack((np.ones((lin.shape[0],1)),lin))
        out = self._sigmoid(lin2@t)
        layers[i] = (lin,out,lin2)
        lin = out
    
    grad = [None]*self.nlayers
    
    # Output layer
    delta_o = (Y - layers[-1][1])*self._sigmoid(layers[-1][1],deriv=True)
    grad[-1] = (1/m)*(layers[-1][2].transpose()@delta_o)

    for i in range(2,len(layers)+1):
        delta_o = (delta_o@self.w[1-i].transpose())[:,1:]*self._sigmoid(layers[-i][1],deriv=True)
        grad[-i] = (1/m)*(layers[-i][2].transpose()@delta_o)
        
    for i in range(len(layers)):
        self.w[i] += LR*grad[i]
        #print(grad[i])
    
    return out
        
MultilayerNN._train_epoch = _train_epoch


### Cost function
Cost function be cross entropy

In [8]:
def _cost(self, out, Y):
    """Calculates cost of the neural nets prediction"""
    return np.sum(-Y*np.log(out) - (1-Y)*np.log(1-out))

MultilayerNN._cost = _cost

In [9]:
def predict(self,X):
    """Predicts output as per the weights for the given output."""
    lin = X
    for t in self.w:
        lin2 = np.column_stack((np.ones((lin.shape[0],1)),lin))
        lin = self._sigmoid(lin2@t)
    return lin

MultilayerNN.predict = predict

### Saving and Loading weights
Using pickle to load and save weights

In [10]:
def save(self,filename):
    """Saves the trained weights as a pickle binary file."""
    with open(filename,'wb') as f:
        pkl.dump(self.w,f)
    
def load(self,filename):
    """Loads the trained weights from the pickle file."""
    with open(filename,'rb') as f: 
        self.w = pkl.load(f)
    
MultilayerNN.save = save
MultilayerNN.load = load