In [5]:
# -*- coding: utf-8 -*-
"""Copy of mnist-with-only-numpy.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1mn3cSV3wqzGJCeZ6_vznjdnkQn3DtJNH
"""

import numpy as np
import tensorflow as tf # only for data
import matplotlib.pyplot as plt



"""Before loading data, we will just make some quick utility functions"""

def to_categorical(x, n_col=None):
  """ One hot encoding function"""
  if not n_col:
    n_col = np.amax(x) + 1
  
  one_hot = np.zeros((x.shape[0], n_col))
  one_hot[np.arange(x.shape[0]), x] = 1
  return one_hot

def accuracy(y_true, y_pred):
  ### Returns accuracy ###
  return np.sum(y_true == y_pred, axis = 0) / len(y_true)

def batch_loader(X, y = None, batch_size=64):
  """ Generates batches for training"""
  n_samples = X.shape[0]
  for i in np.arange(0, n_samples, batch_size):
    begin, end = i, min(i + batch_size, n_samples)
    if y is not None:
      yield X[begin:end], y[begin: end]
    else:
      yield X[begin:end]

"""## Data

We will load and process the data here
"""

## Now, lets load mnist
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
# This is the only time we use tensorflow

y_train, y_test = to_categorical(y_train.astype("int")), to_categorical(y_test.astype("int"))
X_train, X_test = X_train / 255.0, X_test / 255.0

X_train, X_test = X_train.reshape(-1, 28*28), X_test.reshape(-1, 28*28)
X_train.shape, X_test.shape

n_input_dim = 28*28 
n_out = 10 

"""## Loss and activation

We implement the loss and activation functions here
"""

# Now lets implement the loss and activation functions
class CrossEntropy():
  def __init__(self): pass

  def loss(self, y, p):
    p = np.clip(p, 1e-15, 1- 1e-15)
    return -y*np.log(p) - (1 - y) * np.log(1- p)
  
  def gradient(self, y, p):
    p = np.clip(p, 1e-15, 1- 1e-15)
    return -(y/p) + (1 - y) / (1 - p)

# Now for our 2 activation functions, leakyReLU and Softmax

class LeakyReLU():
  def __init__(self, alpha = 0.2):
    self.alpha = alpha
  
  def __call__(self, x):
    return self.activation(x)
  
  def activation(self, x):
    return np.where(x >= 0, x, self.alpha * x)
  
  def gradient(self, x):
    return np.where(x >= 0, 1, self.alpha)

class Softmax():
  def __init__(self): pass
  
  def __call__(self, x):
    return self.activation(x)
  
  def activation(self, x):
    e_x = np.exp(x - np.max(x, axis = -1, keepdims=True))
    return e_x / np.sum(e_x, axis=-1, keepdims = True)
  
  def gradient(self, x):
    # Error was in our softmax
    p = self.activation(x)
    return p * (1 - p)



class Activation():
  def __init__(self, activation, name="activation"):
    self.activation = activation
    self.gradient = activation.gradient
    self.input = None
    self.output = None
    self.name = name
  
  def forward(self, x):
    self.input = x
    self.output = self.activation(x)
    return self.output
  
  def backward(self, output_error, lr = 0.01):
    return self.gradient(self.input) * output_error
  
  def __call__(self, x):
    return self.forward(x)

class Linear():
  def __init__(self, n_in, n_out, name="linear"):
    limit = 1 / np.sqrt(n_in)
    self.W = np.random.uniform(-limit, limit, (n_in, n_out))
    self.b = np.zeros((1, n_out)) # Biases
    self.input = None
    self.output = None
    self.name = name
  
  def forward(self, x):
    self.input = x
    self.output = np.dot(self.input, self.W) + self.b # Wx + b
    return self.output
  
  def backward(self, output_error, lr = 0.01):
    input_error = np.dot(output_error, self.W.T)
    delta = np.dot(self.input.T, output_error) 
    self.W -= lr * delta
    self.b -= lr * np.mean(output_error)
    
    return input_error
  
  def __call__(self, x):
    return self.forward(x)

"""## Network

Let's create the network class
"""

class Network():
  def __init__(self, input_dim, output_dim, lr=0.01):
    # input_dim = 784, output_dim = 10 for mnist
    self.layers = [
                   Linear(input_dim, 256, name="input"),
                   Activation(LeakyReLU(), name="relu1"),
                   Linear(256, 128, name="input"),
                   Activation(LeakyReLU(), name="relu2"),
                   Linear(128, output_dim, name="output"),
                   Activation(Softmax(), name="softmax")
    ]
    self.lr = lr
  
  def forward(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  
  def backward(self, loss_grad):
    for layer in reversed(self.layers):
      loss_grad = layer.backward(loss_grad, self.lr)
    # Iterating backwards through the layers
  
  def __call__(self, x):
    return self.forward(x)

"""## Training

Our training loop is similar to a pytorch training loop

1. We do the forward pass
2. Calculate the loss/gradient
3. Send it backwards through the model

Note we dont have to anything like `optimizer.step()` or `optimizer.zero_grad()` like in pytorch, and that's because we didnt implement any optimizer like Adam, we just use SGD
"""

criterion = CrossEntropy()
model = Network(n_input_dim, n_out, lr=1e-3)

EPOCHS = 5

for epoch in range(EPOCHS):
  loss = []
  acc = []
  for x_batch, y_batch in batch_loader(X_train, y_train):
    out = model(x_batch) # Forward pass
    loss.append(np.mean(criterion.loss(y_batch, out))) # Loss - for display
    # We just passed the inputs incorrectly.
    acc.append(accuracy(np.argmax(y_batch, axis=1), np.argmax(out, axis=1))) # Accuracy - FOr display
    error = criterion.gradient(y_batch, out) # Calculate gradient of loss
    model.backward(error) # Backpropagation
  
  print(f"Epoch {epoch + 1}, Loss: {np.mean(loss)}, Acc: {np.mean(acc)}")

out = model(X_test) # Now we run the model on the test set
accuracy(np.argmax(y_test, axis=1), np.argmax(out, axis=1)) # We get an accuracy of 96%

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1, Loss: 0.09289325562683205, Acc: 0.8510794243070362
Epoch 2, Loss: 0.04380013596318813, Acc: 0.9249566897654584
Epoch 3, Loss: 0.03335114586317122, Acc: 0.9437300106609808
Epoch 4, Loss: 0.026771583532097413, Acc: 0.9553238272921108
Epoch 5, Loss: 0.0222014654561561, Acc: 0.9635361140724946


0.9602