In [None]:
# import stuff
import tensorflow as tf
import numpy as np
tf.enable_eager_execution()
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow.contrib.eager as tfe
import matplotlib.gridspec as gridspec

In [None]:
# a simple plotting function which plots 10 exmples along with their predictions
def plot(samples,labels):
    fig = plt.figure(figsize=(10, 4))
    gs = gridspec.GridSpec(2, 5)
    gs.update(wspace=None, hspace=None)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample, cmap='Greys_r')
        number = labels[i].numpy()
        plt.title(str(number))
    return

In [None]:
# load the MNIST data using Keras
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
# define a function that scales 0-255 int images to 0-1 float pixel values
def scale(x):
    return tf.to_float(x) / 255.0

In [None]:
# construct a tensorflow Dataset object with iterators that allows going through all the training and test data easily
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.map(lambda x, y: (scale(x), tf.one_hot(y, 10))).shuffle(10000).batch(30)
test_dsD = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_ds = test_dsD.map(lambda x, y: (scale(x), tf.one_hot(y, 10))).shuffle(10000).batch(30)

In [None]:
# Construct the model. In the inittialization section we define our layers and in the call we write the forward pass model
class MNISTModel(tf.keras.Model):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self._input_shape = [-1, 28, 28, 1]
        # Declare your layers here. Assign each layer to a variable for this class, which we will use later in "call" to process the data.

        # use https://www.tensorflow.org/api_docs/python/tf/layers/conv2d with relu (tf.nn.relu) activation
        # use filter size of 5 and 32 output channels (number of features) and same padding        
        self.conv1 = ...
        
        # use tf.layers.MaxPooling2D
        self.max_pool2d = ...
        
        # use tf.layers.Conv2D, with filter size of 5 and 64 output channels and relu activation and same padding
        self.conv2 = ...

        # use tf.layers.Dense with 750 neurons and a relu activation to add a fully connected layer (https://www.tensorflow.org/api_docs/python/tf/layers/dense)
        self.fc1 = ...

        # add a dropout layer with 0.5 rate: https://www.tensorflow.org/api_docs/python/tf/layers/dropout
        self.dropout = ...

        # use a tf.layers.Dense layer with 10 neurons (10 digits)
        self.fc2 = ...
    
    def call(self, x):
        # First we reshape the input to 1,28,28,1
        x = tf.reshape(x, self._input_shape)
        # Now the forward pass:
        x = self.conv1(x)
        # complete the rest of the forward pass below:
        x = ...
        x = ...
        x = ...
        x = ...
        x = ...
        x = ...
        output = ...
        return output

In [None]:
# Define our loss function
def loss_fn(model, x, y):
    # predict the logits for your model:
    model_logits = model(x)    

    # Now use tf.nn.softmax_cross_entropy_with_logits_v2 (https://www.tensorflow.org/api_docs/python/tf/nn/softmax_cross_entropy_with_logits_v2)
    # to write a loss given these predictions and the labels.
    # use tf.reduce_mean to average over all the batche and all numbers 
    loss = ...
    return loss

In [None]:
# An estimate of the accuracy, for the validation and test sets.
def get_accuracy(model, x, y_true):
    logits = model(x)
    prediction = tf.argmax(logits, 1)
    equality = tf.equal(prediction, tf.argmax(y_true, 1))
    accuracy = tf.reduce_mean(tf.cast(equality, tf.float32))
    return accuracy

In [None]:
# make an instance of our model
model = MNISTModel()

In [None]:
# make an instance of an optimizer
optimizer = tf.train.AdamOptimizer()

In [None]:
# Train: go through the data, calculate the gradients and apply them to the network weights. As we do this
# we also look at the accuracy of the network prediction for the training data at every 10 steps.
epochs = 100
for (batch, (images, labels)) in enumerate(train_ds):
    with tfe.GradientTape() as tape:
        loss = loss_fn(model, images, labels)
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(zip(grads, model.variables), global_step=tf.train.get_or_create_global_step())
    if batch % 10 == 0:
        acc = get_accuracy(model, images, labels).numpy()
        print("Iteration {}, loss: {:.3f}, train accuracy: {:.2f}%".format(batch, loss_fn(model, images, labels).numpy(), acc*100))
    if batch > epochs:
        break

In [None]:
# test the performance of the network on the test set and plot a few examples
avg_acc = 0
for (batch, (images, labels)) in enumerate(test_ds):
    logits = model(images)
    prediction = tf.argmax(logits, 1)
    avg_acc += get_accuracy(model, images, labels).numpy()
    if batch % 100 == 0 and batch != 0:
        plot(images[0:10],prediction[0:10])
        print("Iteration:{}, Average test accuracy: {:.2f}%".format(batch, (avg_acc/batch)*100))
plot(images[0:10],prediction[0:10])
print("Final test accuracy: {:.2f}%".format(avg_acc/batch * 100))
