Before you turn this project in, make sure everything runs as expected. First, **restart the kernel** (in the menubar, select Kernel$\rightarrow$Restart) and then **run all cells** (in the menubar, select Cell$\rightarrow$Run All).

Make sure you fill in any place that says `YOUR CODE HERE` or "YOUR ANSWER HERE", as well as your name and collaborators below:

In [None]:
NAME = ""
COLLABORATORS = ""

# Load Data

The following code loads in the MNIST dataset and displays a few images and flattens the images and sets some autograder variables.

In [None]:
from IPython.display import display
import numpy as np
from keras.preprocessing.image import array_to_img, img_to_array
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
%load_ext autoreload
%autoreload

[X, y], _ = mnist.load_data()
for x in X[:5]:
    x = np.expand_dims(x, axis=-1)
    img = array_to_img(x)
    display(img)

X = X.reshape([60_000, 28*28]) / 255.
Y = to_categorical(y)
X, Y = X[:50], Y[:50]

M, N = X.shape
C = np.unique(y).shape[0]
H = 16

def passed(): print('✅')

# Task

- Implement `Dense`, `Sigmoid`, and `SoftmaxCE` layers as classes and stick them in a file `layers.py`

In [None]:
# YOUR CODE HERE
raise NotImplementedError()

# Task

- Define a one-hidden layer perceptron class called `LayeredMLP` which uses Dense, Sigmoid, and SoftmaxCE layers as in the computational graph

![](images/mlp_predict.svg)

where

- $\mathbf{X} \in \mathbb{R}^{M \times N}$
- $\mathbf{W}^{(1)} \in \mathbb{R}^{N \times H}$ and $\mathbf{b}^{(1)} \in \mathbb{R}^{H}$
- $\mathbf{Z} \in \mathbb{R}^{M \times H}$
- $\mathbf{H} \in \mathbb{R}^{M \times H}$
- $\mathbf{W}^{(2)} \in \mathbb{R}^{H \times C}$ and $\mathbf{b}^{(2)} \in \mathbb{R}^{C}$
- $\mathbf{S} \in \mathbb{R}^{M \times C}$
- $\mathbf{Y} \in \mathbb{R}^{M \times C}$
- $\mathbf{L} \in \mathbb{R}^{M}$ and $\overline{\ell} \in \mathbb{R}$

In [None]:
# YOUR CODE HERE
raise NotImplementedError()

# Constructor Tests

In [None]:
%autoreload
from classifiers import LayeredMLP
import layers

mlp = LayeredMLP(nb_feature=N, nb_hidden=H, nb_class=C)

assert type(mlp.dense1) == layers.Dense
assert hasattr(mlp.dense1, 'W')
assert hasattr(mlp.dense1, 'b')
assert type(mlp.sigmoid) == layers.Sigmoid
assert type(mlp.dense2) == layers.Dense
assert hasattr(mlp.dense2, 'W')
assert hasattr(mlp.dense2, 'b')
assert type(mlp.softmaxce) == layers.SoftmaxCE

passed()

# Prediction Tests

In [None]:
from classifiers import LayeredMLP

mlp = LayeredMLP(nb_feature=N, nb_hidden=H, nb_class=C)

S = mlp.predict(X)
nb_train_ = len(X)
assert S.shape == (M, C)

passed()

# Evaluation Tests

In [None]:
from classifiers import LayeredMLP

mlp = LayeredMLP(nb_feature=N, nb_hidden=H, nb_class=C)

acc = mlp.evaluate(X, Y)
assert type(acc) == np.float64
assert 0 <= acc <= 1

passed()

# Task

- Implement a `LayeredMLPWithGDOptimizer` class which performs optimization via gradient descent and extends your `LayeredMLP` class

# Requirement

- You must use backpropagation to compute gradients. To demonstrate this I am requiring your `_get_gradients()` function needs to return the gradient of every intermediate value in the computational graph as in

![](images/mlp_full.svg)

including `dX` (not pictured). You don't have to return `dloss`. Check the tests below to clear up any confusion.

In [None]:
# YOUR CODE HERE
raise NotImplementedError()

# Gradient Checking Tests

In [None]:
%autoreload
from classifiers import LayeredMLP, LayeredMLPWithGDOptimizer

mlp = LayeredMLPWithGDOptimizer(nb_feature=N, nb_hidden=H, nb_class=C)
assert issubclass(LayeredMLPWithGDOptimizer, LayeredMLP)

gradients = mlp._get_gradients(X, Y)
for gradient in gradients:
    assert type(gradient) == np.ndarray

# Gradient Checking Tests

In [None]:
from classifiers import LayeredMLPWithGDOptimizer
from checking import estimate_gradients

mlp = LayeredMLPWithGDOptimizer(nb_feature=N, nb_hidden=H, nb_class=C)

estimated_gradients = estimate_gradients(mlp, X, Y)
dX, dW1, db1, dZ, dH, dW2, db2, dS = mlp._get_gradients(X, Y)
analytical_gradients, params = [dW1, db1, dW2, db2], ['dW1', 'db1', 'dW2', 'db2']
grad_pairs = zip(estimated_gradients, analytical_gradients, params)
for i, (estimated_gradient, analytic_gradient, param) in enumerate(grad_pairs):
    try:
        assert np.allclose(estimated_gradient, analytic_gradient)
    except:
        norm = np.square(estimated_gradient - analytic_gradient).mean()
        logging.warning(f'{param} check failed with a difference of {norm}!')

# Gradient Descent Optimizer Tests

In [None]:
from classifiers import LayeredMLPWithGDOptimizer

mlp = LayeredMLPWithGDOptimizer(nb_feature=N, nb_hidden=H, nb_class=C)

X_sample, Y_sample = X[:50], Y[:50]
acc = mlp.evaluate(X_sample, Y_sample)
loss = mlp.forward(X_sample, Y_sample)
for _ in range(10):
    mlp.fit(X_sample, Y_sample, nb_epoch=10)
    assert mlp.forward(X_sample, Y_sample) < loss
    loss = mlp.forward(X_sample, Y_sample)
    
assert mlp.evaluate(X_sample, Y_sample) > acc

passed()

# Ignore Cell Below

In [None]:
# YOUR CODE HERE
raise NotImplementedError()