# Demo using MNIST Dataset

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from NeuralNet.nnet import NeuralNet

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 2000)

In [None]:
train_df = pd.read_csv('./resources/train.csv')
test_df = pd.read_csv('./resources/test.csv')

In [None]:
train_df = train_df.sample(frac=1)
train_df.head()

In [None]:
train_df.isnull().any().describe()

In [None]:
len(train_df)

In [None]:
training = train_df.head(30000)
validation = train_df.tail(12000)
del train_df # free some memory

In [None]:
def one_hot_encode(labels, label_list):
    new_labels = []
    for lab in labels:
        y = np.zeros(len(label_list))
        y[label_list.index(lab)] = 1
        new_labels.append(y)
    return (new_labels)
def normalize_data(data):
    labels = []
    features = data.iloc[:, 1:] / 255
    try:
        labels = data['label']
        print('label len =', len(labels))
        sns.countplot(labels)
        labels = one_hot_encode(labels, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    except KeyError:
        pass
    return (features, labels)

In [None]:
t_features, t_labels = normalize_data(training)
del training # free some memory

In [None]:
t_features.describe()

In [None]:
v_features, v_labels = normalize_data(validation)
del validation

In [None]:
v_features.describe()

In [None]:
example = t_features.iloc[870].values
example = example.reshape((28, 28))
plt.imshow(example)

## Coding the Neural Network

In [None]:
class NeuralNetwork:
    def __init__(self, input_size, topology):
        """
        Specify the Network's architecture.
        
        Initializes weights and biases (He initialization) of the Network.
        
        Parameters
        ----------
        input_size : int
        Size of the input layer of the Network
        topology : tuple
        Tuple that contains the number of nodes per layer
        (e.g. (256, 128) --> 2 layers of 256 and 128 nodes respectively)
        """
        self.input_size = input_size
        self.topology = topology
        self.weights = []
        self.biases = []
        prev_layer_size = input_size
        for layer_size in topology:
            self.weights.append(np.random.randn(layer_size, prev_layer_size) * np.sqrt(2 / prev_layer_size))
            self.biases.append(np.zeros(layer_size))
            prev_layer_size = layer_size
    def softmax(self, vec):
        """Calculate the softmax of a vector."""
        ex_vec = np.exp(vec)
        return (ex_vec / sum(ex_vec))
    def forwardprop(self, x, y):
        """
        Forward propagation of the Network.
        
        Parameters
        ----------
        x : numpy array
        Current training example
        y : numpy array
        Current label example
        
        Returns
        -------
        ops : list
        Contains every vector calculated during the forward propagation from input to output (both included)
        loss : int
        Loss of the given the current training example
        """
        ops = [x]
        for i in range(len(self.topology)):
            z = np.matmul(self.weights[i], ops[-1]) + self.biases[i]
            if (i == len(self.topology) - 1):
                a = self.softmax(z)
            else:
                a = np.where(z < 0, 0, z)
            ops.append(z)
            ops.append(a)
        loss = -1 * np.log(np.dot(ops[-1], y))
        return (ops, loss)
    def backprop(self, l_rate, ops, y):
        """
        Backpropagation of the Network.
        
        Parameters
        ----------
        l_rate : float
        Learning rate
        ops : list
        Contains every vector calculated during the forward propagation
        y : numpy array
        Current label example
        
        Returns
        -------
        list
        Contains every gradient calculated during the backpropagation
        """
        grads = []
        grad_z = ops.pop() - y
        ops.pop()
        for i in range(len(self.topology)):
            grads.append(np.outer(grad_z, ops.pop()))
            grads.append(grad_z)
            if (i < len(self.topology) - 1):
                grad_prev_a = np.dot(self.weights[-i - 1].T, grad_z)
                grad_actv_funct = np.where(ops.pop() > 0, 1, 0)
                grad_z = grad_actv_funct * grad_prev_a
        
        # update weights and biases
        ret_grads = grads.copy()
        for i in range(len(self.topology)):
            a = grads.pop()
            b = grads.pop()
            self.biases[i] -= l_rate * a
            self.weights[i] -= l_rate * b
        return (ret_grads)
    def plot_stats(self, stats):
        """Plots training and validation's loss and accuracy."""
        df = pd.DataFrame(stats)
        df_loss = df.iloc[:, :2]
        df_acc = df.iloc[:, 2:4]
        f, ax = plt.subplots(figsize=(20, 15))
        plt.plot(df_loss, linewidth=4)
        f.legend(("Training", "Validation"), fontsize=25)
        plt.xticks(fontsize=25)
        plt.yticks(fontsize=25)
        plt.xlabel('Steps', fontsize=25)
        plt.ylabel("Loss", fontsize=25)
        plt.title("Cross Entropy over whole dataset", fontsize=40);
        
        f, ax = plt.subplots(figsize=(20, 15))
        plt.plot(df_acc, linewidth=4)
        f.legend(("Training", "Validation"), fontsize=25)
        plt.xticks(fontsize=25)
        plt.yticks(fontsize=25)
        plt.xlabel('Steps', fontsize=25)
        plt.ylabel("Accuracy", fontsize=25)
        plt.title("Accuracy over whole dataset", fontsize=40);
    def update_stats(self, t_features, t_labels, v_features, v_labels):
        """Saves new stats using the current weights and bias."""
        t_loss = 0
        t_acc = 0
        v_loss = 0
        v_acc = 0
        for x, y in zip(t_features, t_labels):
            ops, loss = self.forwardprop(x, y)
            t_loss += loss
            ind = np.argmax(ops[-1])
            t_acc += y[ind]
        t_loss /= len(t_features)
        t_acc /= len(t_features)
        for x, y, in zip(v_features, v_labels):
            ops, loss = self.forwardprop(x, y)
            v_loss += loss
            ind = np.argmax(ops[-1])
            v_acc += y[ind]
        v_loss /= len(v_features)
        v_acc /= len(v_features)
        print('training:\n loss:', t_loss, 'accuracy:', t_acc)
        print('validation:\n loss:', v_loss, 'accuracy', t_acc)
        print()
        return ((t_loss, v_loss, t_acc, v_acc))
    def next_batch(self, features, labels, batch_size):
        """Yield the next batch for training."""
        for i in range(0, len(features), batch_size):
            yield features[:][i:i + batch_size], labels[i:i + batch_size]
    def training(self, t_features, t_labels, v_features, v_labels, l_rate, epochs, batch_size, plot_steps):
        """
        Function to train the Network.
        
        Parameters
        ----------
        t_features : numpy array
        Training features
        t_labels : numpy array
        Training labels
        v_features : numpy array
        Validation features
        v_labels : numpy array
        Validation labels
        l_rate : float
        Learning rate
        epochs : int
        Epochs of training
        batch_size : int
        Batch size for training
        plot_steps : int
        The amount of steps to take before plotting
        
        Returns
        -------
        List of tuples containing training and validation's loss and accuracy
        """
        steps = 0
        stats = []
        for epoch_nb in range(epochs):
            generator = self.next_batch(t_features, t_labels, batch_size)
            for batch_x, batch_y in generator:
                batch_loss = 0
                for x, y in zip(batch_x, batch_y):
                    ops, loss = self.forwardprop(x, y)
                    batch_loss += loss
                    if (steps % plot_steps == 0):
                        stats.append(self.update_stats(t_features, t_labels, v_features, v_labels))
                    steps += 1
                batch_loss /= batch_size
                self.backprop(l_rate, ops, y)
        return (stats)

In [None]:
nn = NeuralNetwork(input_size=784, topology=(512, 10))

In [None]:
test_features = t_features.values
test_labels = t_labels
val_features = v_features.values
val_labels = v_labels

In [None]:
loss_plot = nn.training(test_features, test_labels, val_features, val_labels, l_rate=0.01, epochs=5, batch_size=75, plot_steps=2000)

In [None]:
nn.plot_stats(loss_plot)