In [1]:
import pickle
import numpy as np
import plotly.express as ex
from sklearn.model_selection import train_test_split
import json

__author__="Nuo Chen"

# MLP Model 
---
A basic MLP network:

**Input**:

**Learning rule**:
- Delta rule
- Backpropagation (generalised delta rule)

**Output**:


    

In [3]:
class Model:
    def __init__(self, nodes):
        """
        Arg:
            layers - [in, n1, n2, ..., nj, out] an array of numbers of nodes in each layer.
        Returns:
            a fully-connected network with default weights and biases sampled from N(0, 1)
        """
        self.n_layers = len(nodes)
        self.NODES = nodes
        self.init_param()
    def init_param(self):
        """
        Initialize the weights and biases with samples from normal distributions
        W = [shape(n1,n2), shape(n2,n3), ..., shape(nj,nj+1)], where nj = number of nodes in the layer
        B = [shape(n1,1), shape(n2,1), ..., shape(nj,1)]
        """
        self.W = [np.random.normal(0,1,(i,j)) for i,j in zip(self.NODES[:-1], self.NODES[1:])]
        self.B = [np.random.normal(0,1,(i,1)) for i in self.NODES[1:]]
    def init_training_param(self, n, batch_size, epochs, eta, lmbda):
        """
        Initialize the hyper-parameters
        """
        self.n_samples = n
        self.BATCH_SIZE = batch_size
        self.EPOCS = epochs
        self.BATCHES = int(n/batch_size)
        self.LAMBDA = lmbda
        self.ETA = eta
    def normalize(self, x):
        """
        Normalizes the inputs
        """
        mean = np.mean(x, axis=1, keepdims=True)
        std = np.std(x, axis=1, keepdims=True)
        x = (x-mean) / std
        return x

    @staticmethod
    def softmax():
        return lambda x: np.exp(x-np.max(x,axis=0)) / np.sum(np.exp(x-np.max(x,axis=0)), axis=0)
    @staticmethod
    def relu():
        return lambda x: np.maximum(0,x)
    @staticmethod
    def delta_rule():
        return lambda x,w,t,eta: -eta*(w@x - t)@x.T    
    @staticmethod
    def sigmoid():
        return lambda x: 2 / (1+np.exp(-x)) - 1
    
    def cross_entropy(self, p, y):
        """
        Returns the cross-entropy cost of the prediciton
        """
        p[p==0] = 1e-7
        cost = 1/y.shape[1] * -np.sum(y*np.log(p))
        w_sum = [w**2 for w in self.W]
        s = 0
        for w in w_sum: 
            s+= np.sum(w)
        cost += self.LAMBDA * s
        return cost
    def feedforward(self, activations, act_fn, out_fn):
        """
        s1 = w1 @ x + b1
        h1 = act_fn(s1)
        ....
        sn = wn @ h(n-1) + bn
        return out_fn(sn)
        """
        a = activations[0]
        for i in range(self.n_layers-1):
            s = self.W[i].T @ a + self.B[i]
            a = act_fn(a)
            activations.append(a)

        return out_fn(a)
    def backPropagation(self, y, p, activations):
        """
        Back propagate the network and calculate the gradients
        """
        dw = [np.zeros(w.shape) for w in self.W]
        db = [np.zeros(b.shape) for b in self.B]

        g = -(y - p)
        for i in range(len(self.W)-1, -1, -1):
            dw[i] = g @ activations[i].T * 1/self.BATCH_SIZE + 2 * self.LAMBDA * self.W[i].T
            db[i] =  (np.sum(g, axis=1) * 1/self.BATCH_SIZE).reshape(self.B[i].shape)
            g = self.W[i] @ g
            g[np.where(activations[i]<=0)] = 0
        
        return (dw, db)
    def backPass(self, labels, predictions, activations):
        dw = [np.zeros(w.shape) for w in self.W]
        db = [np.zeros(b.shape) for b in self.B]

        g = (p-y)
        for i in range(len(self.W)-1, -1, -1):
            def

    def accuracy(self, p, y):
        """
        Compute the accuracy of the predictions
        """
        predictions = np.argmax(p, axis=0)
        y = np.argmax(y, axis=0)
        acc = predictions.T[predictions == y].shape[0] / p.shape[1]
        return acc
    
    def update_batch(self, x, y):
        """
        For each batch: 
            Pass the input into the network and compute the predictions.
            Back propagate through the network to compute the gradients using the stored act>
            Update the weights and biases using the gradients
        """
        activations = [x]
        p = self.feedforward(activations, relu(), softmax())
        dw, db = self.backPropagation(y, p, activations)

        for i in range(self.L-1):
            self.W[i] = self.W[i] - self.ETA * dw[i].T
            self.B[i] = self.B[i] - self.ETA * db[i]


    def SGD(self, data, features, targets, test_size, verbose=False):
        """
        Stochastic gradient descend method
        Trains the network a given number of epochs or cycles
        Return:
            Training cost and validation cost
            Training accuracy and validation accuracy
        """

        train_features, test_features, train_labels, test_labels = train_test_split(features, targets, test_size = test_size, random_state = 2020)

        training_cost = []
        validation_cost = []
        training_accuracy = []
        for t in range(self.EPOCHS):
            # Shuffles the order of samples 
            idx = np.random.permutation(self.n_samples)
            for j in range(1, self.BATCHES):
                start = (j-1) * self.BATCH_SIZE
                end = j * self.BATCH_SIZE
                indices = idx[start:end]
                x_batch = train_features[:, indices]            
                y_batch = train_labels[:, indices]    
                self.update_batch(x_batch, y_batch)

                    # Check cost and accuracy 10 times per cycle 

            p_t = self.feedforward([train_features])
            p_v = self.feedforward([test_features])
            training_cost.append(self.cross_entropy(p_t, train_labels))
            validation_cost.append(self.cross_entropy(p_v, test_labels))
            training_accuracy.append(self.accuracy(p_t, train_labels))
            validation_accuracy.append(self.accuracy(p_v, test_labels))

            if (verbose):
                print("Epoch #{}--------------------------------------".format(i))
                print("Training Cost: {:.6f}".format(training_cost[-1]))
                print("Validation Cost: {:.6f}".format(validation_cost[-1]))
                print("Training Accuracy = {:.3f}".format(training_accuracy[-1]))
                print("Validation Accuracy = {:.3f}".format(validation_accuracy[-1]))
                print("-"*50)

        return (training_cost, validation_cost, training_accuracy, validation_accuracy)

    def save(self, filename):
        """
        Save the model to the file 'filename`.
        """
        data = {"Nodes": self.NODES,
                "W": [w.tolist() for w in self.W],
                "B": [b.tolist() for b in self.B]}
        with open(filename, "w") as f:
            json.dump(data, f)

    def load(self, filename):
        """
        Load the model
        """
        with open(filename, "r") as f:
            data = json.load(f)
        
        self.NODES = data["Nodes"]
        self.W = [np.array(w) for w in data["W"]]
        self.B = [np.array(b) for b in data["B"]]

