In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

## Activation function

In [3]:
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

def sigmoid_d(z):
    return sigmoid(z) * (1.0 - sigmoid(z))

### Cost funtion

In [4]:
def cost(activation, target):
    return 0.5 * (target - activation) ** 2

def cost_d(activation, target):
    return (target - activation)

### Simple ANN with 1 hidden layer

Here we manually do backpropagation to learn weights for a simple nn with 30 hidden weights that will attempt to classify between 0 and 1 in the mnist dataset

In [32]:
input_dim = 28 * 28
hidden_dim = 30 
out_dim = 2

hidden_weights = np.random.normal(0, 1, (hidden_dim, input_dim))
out_weights = np.random.normal(0, 1, (out_dim, hidden_dim))

hidden_biases = np.zeros((hidden_dim, 1))
out_biases = np.zeros((out_dim, 1))

In [27]:
def forward(x):
    """Compute a full forward pass of this small network"""
    hidden_activations = sigmoid(np.dot(hidden_weights, x) + hidden_biases)
    out_activations = sigmoid(np.dot(out_weights, hidden_activations) + out_biases)
    return out_activations

In [33]:
def backpropagate(x, y):
    # Do a forward pass but cache the activations and z's 
    hidden_z = np.dot(hidden_weights, x) + hidden_biases
    hidden_a = sigmoid(hidden_z)
    out_z = np.dot(out_weights, hidden_a) + out_biases
    out_a = sigmoid(out_z)
    correct = np.argmax(out_a) == np.argmax(y)
    # Now do the backwards pass
    # Output layer
    out_error = cost_d(out_a, y) * sigmoid_d(out_z)
    # Partial derivatives
    change_out_biases = out_error
    change_out_weights = np.dot(out_error , hidden_a.T)
    # Hidden layer
    # Backpropagate the error
    hidden_error = np.dot(out_weights.T, out_error) * sigmoid_d(hidden_z)
    # Get the partial derivatives
    change_hidden_biases = hidden_error
    change_hidden_weights = np.dot(hidden_error, x.T)

    return  correct, change_out_weights, change_out_biases, change_hidden_weights, change_hidden_biases

### Get the MNIST dataset
And just grab the 0 and 1 images

In [40]:
from data_loaders import MNISTLoader
mloader = MNISTLoader()
train_data, train_labels = mloader.get_training_set()

In [41]:
keep = np.logical_or(train_labels == 0,  train_labels == 1)
train_labels = train_labels[keep]
train_data = train_data[keep]
num_samples = train_labels.shape[0]

# get images as vectors
x = train_data.reshape(num_samples, -1, 1)

# one-hot encode labels
y = np.zeros((num_samples, 2, 1))
y[np.arange(num_samples),train_labels] = 1

### Stochastic gradient descent

Use SGD to train this small network for a few epochs and watch our training accuracy

In [42]:
learning_rate = 1e-3
batch_size = 30 
batch_starts = np.arange(batch_size, num_samples, batch_size)
idxes = np.arange(0, num_samples, 1)
num_epochs = 10
for epoch in range(num_epochs):
    np.random.shuffle(idxes)
    batches = np.split(idxes, batch_starts)
    total_correct = 0
    for batch in batches:
        # The total changes for the batch
        delta_out_weights = np.zeros(out_weights.shape)
        delta_out_biases = np.zeros(out_biases.shape)
        delta_hidden_weights = np.zeros(hidden_weights.shape)
        delta_hidden_biases = np.zeros(hidden_biases.shape)

        for idx in batch:
            correct, d_ow, d_ob, d_hw, d_hb = backpropagate(x[idx], y[idx])
            total_correct += correct
            delta_out_weights += d_ow
            delta_out_biases += d_ob
            delta_hidden_weights += d_hw
            delta_hidden_biases += d_hb

        out_weights += learning_rate * delta_out_weights 
        out_biases += learning_rate * delta_out_biases
        hidden_weights += learning_rate * delta_hidden_weights 
        hidden_biases += learning_rate * delta_hidden_biases

    print(f'{total_correct} / {num_samples}')

7806 / 12665
10909 / 12665
12115 / 12665
12330 / 12665
12412 / 12665
12450 / 12665
12468 / 12665
12482 / 12665
12499 / 12665
12511 / 12665
