In [30]:
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split
import time

x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
x = (x/255).astype('float32')
y = to_categorical(y)

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state=42)

In [28]:
class NeuralNetwork():
    
    def __init__(self, sizes, activation_functions=['sigmoid', 'sigmoid', 'softmax'], epochs=10, l_rate=0.01):
        self.sizes = sizes
        self.epochs = epochs
        self.l_rate = l_rate
        self.activation_functions = activation_functions

        # we save all parameters in the neural network in this dictionary
        self.params = self.initialization()

    def sigmoid(self, x, derivative=False):
        if derivative:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        return 1/(1 + np.exp(-x))

    def softmax(self, x, derivative=False):
        # Numerically stable with large exponentials
        exps = np.exp(x - x.max())
        if derivative:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)
    
    def relu(self, x, derivative=False):
            if derivative:
                copy = np.array(x, copy = True)
                copy[x <= 0] = 0
                copy[x > 0] = 1
                return copy
            return np.maximum(0,x)

    def initialization(self):
        
        layers = []
        for index in range(len(self.sizes)):
            layer = index + 1
            layers[index] = self.size[index]
        
            params = {
                ['W' + str(index)]:np.random.randn(layers[layer], layers[index]) * np.sqrt(1. / layers[layer])
            }

        return params

    def forward_pass(self, x_train):
        params = self.params
        activation_function = self.activation_functions
        
        # input layer activations becomes sample
        params['A0'] = x_train
        
        for index in range(len(params)):
            layer = index + 1
            
            if activation_function[index] == 'sigmoid':
                activation = self.sigmoid()
            elif activation_function[index] == 'relu':
                activation = self.relu()
            elif activation_function[index] == 'softmax':
                activation = self.softmax()
            else:
                print('Activation function not supported')
                
            params['Z' + str(layer)] = np.dot(params['W' + str(layer)], params['A' + str(index)])
            params['A' + str(layer)] = activation(params['Z' + str(layer)])

        return params['A' + (str(len(params)))]

    def backward_pass(self, y_train, output):
        
        params = self.params
        change_w = {}
        
        for index in reversed(range(len(params))):
            layer = index - 1
            layer_2 = layer - 1
            
            if activation_function[index] == 'sigmoid':
                activation = self.sigmoid()
            elif activation_function[index] == 'relu':
                activation = self.relu()
            elif activation_function[index] == 'softmax':
                activation = self.softmax()
            else:
                print('Activation function not supported')
                
            if index == (len(params) - 1):    
                error = 2 * (output - y_train) / output.shape[0] * activation(params['Z' + str(index)], derivative=True)
                change_w['W' + str(index)] = np.outer(error, params['A' + str(layer)])
            else:
                error = np.dot(params['W' + str(index)].T, error) * activation(params['Z' + str(layer)], derivative=True)
                change_w['W' + str(layer)] = np.outer(error, params['A' + str(layer_2)])

        return change_w

    def update_network_parameters(self, changes_to_w):
        
        for key, value in changes_to_w.items():
            self.params[key] -= self.l_rate * value

    def compute_accuracy(self, x_val, y_val):

        predictions = []

        for x, y in zip(x_val, y_val):
            output = self.forward_pass(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))
        
        return np.mean(predictions)

    def train(self, x_train, y_train, x_val, y_val):
        start_time = time.time()
        for iteration in range(self.epochs):
            for x,y in zip(x_train, y_train):
                output = self.forward_pass(x)
                changes_to_w = self.backward_pass(y, output)
                self.update_network_parameters(changes_to_w)
            
            accuracy = self.compute_accuracy(x_val, y_val)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
                iteration+1, time.time() - start_time, accuracy * 100
            ))

In [29]:
dnn = DeepNeuralNetwork(sizes=[784, 128, 64, 10])
dnn.train(x_train, y_train, x_val, y_val)

Epoch: 1, Time Spent: 25.23s, Accuracy: 15.60%
Epoch: 2, Time Spent: 52.15s, Accuracy: 21.43%


KeyboardInterrupt: 