In [None]:
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential,Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, Input
from keras.datasets import mnist
from time import time
%matplotlib inline

'%matplotlib inline' is a 'Magic' command to get the plots inline with the notebook.

### Loading the Dataset from Keras

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
print(type(x_train),' ',shape(x_train.shape))

### Visualising the Data 

In [None]:
np.random.seed(3)
n_rows = 2
n_cols = 3
fig, ax = plt.subplots(nrows=n_rows,ncols=n_cols)
fig.suptitle("Dataset", size=18)
for i in range(n_rows):
    for j in range(n_cols):
        ax[i][j].imshow(x_train[np.random.randint(0,6000)])

### Preprocessing the Data:

Preprocessing includes scaling the data to [-1,1] and getting zero mean. Also, as the Conv2D requires number of channels as a dimension, we reshaped the dataset to include the one and only grayscal channel.

In [None]:
def preprocess(dataset):
    dataset = (dataset - np.mean(dataset))/255
    dataset = dataset.reshape(-1,28,28,1)
    return dataset
x_train = preprocess(x_train)
x_test = preprocess(x_test)

### 'One-hot'-ing the Labels

One-hot vectors are used for multiclass classification. In one-hot vectors, only one bit would be set which corresponds to the correct category it belongs to. Rest are zero. Although here, an inbuilt function to_categorical from keras.utils can be used.

In [None]:
def onehot(labels):
    return np.array([[float(i==data) for i in range(10)] for data in labels])
y_train = onehot(y_train)
y_test = onehot(y_test)

### Defining the Model and its Architecture:

The Model defined is a very simple one with two Convolutional Layers and two Dense layers. Dropout layers were used for regularization. Since there are 10 labels (0 to 9), the last layer has 10 output nodes. Softmax activation gives us the probabilities directly.

In [None]:
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=5, padding='SAME', activation='relu', input_shape=(28,28,1)))
model.add(MaxPool2D(padding = 'SAME'))
model.add(Conv2D(filters=32, kernel_size=5, padding='SAME', activation='relu'))
model.add(MaxPool2D())
model.add(Dropout(0.25))
model.add(Flatten())

In [None]:
model.add(Dense(256, activation='relu'))
model.add(Dense(10, activation='sigmoid'))

Let us now see what our model looks like. Keras has a very simple method called summary() that outputs the layers, their shapes and the number of parameters in it.

In [None]:
model.summary()

### Compiling the Model and Training:

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=10, verbose=1, validation_data=(x_test, y_test))

### Using the Model on a test dataset and Calculating the Accuracy:

In [None]:
model.evaluate(x_test, y_test, verbose=1)

### Visualising Predictions

In [None]:
np.random.seed(3)
n_rows = 2
n_cols = 3
fig, ax = plt.subplots(nrows=n_rows,ncols=n_cols)
fig.suptitle("Predictions", size=18)
ec = (0, 0, 0)
fc = (1, 1, 1)
for i in range(n_rows):
    for j in range(n_cols):
        k = np.random.randint(0,6000)
        ax[i][j].imshow(x_test[k].reshape((28,28)))
        temp = np.argmax(model.predict(np.expand_dims(x_test[k], axis=0)))
        ax[i][j].text(s="Predicted Value: {}".format(temp), x=0.75, y=0,
                      bbox=dict(boxstyle="round", ec=ec, fc=fc))
plt.setp(ax, xticks=[], yticks=[])

## Model's Representations
From what I have learned from Style Transfer, I'm finding out what model thinks of individual digits

In [24]:
import keras.backend as K

In [25]:
x_representation = K.variable(np.random.randn(10,28,28,1))
y_representation = [[int(i==j) for i in range(10)] for j in range(10)]
y_representation = K.constant(y_representation)

In [26]:
from keras import Model
from keras.layers import Input
new_input = Input(tensor=x_representation)
new_output = model(new_input)
new_model = Model(new_input, new_output)

In [27]:
y_representation

<tf.Tensor 'Const_5:0' shape=(10, 10) dtype=float32>

In [36]:
loss = 1e6*K.mean((K.categorical_crossentropy(K.round(new_model.output), y_representation)))

In [31]:
from keras.optimizers import Adam

In [37]:
opt = Adam(1)
updates = opt.get_updates(params=[x_representation],loss=loss)

ValueError: None values not supported.

In [33]:
train = K.function([],[loss], updates)

NameError: name 'updates' is not defined

In [None]:
from time import time

In [None]:
start = time()
for epoch in range(50000):
    e_start = time()
    out = train([])[0]
    e_end = time()
    if(epoch%100==0):
        print("Epoch {}, Loss {:.4f}, Time Taken per Step: {:.2f}ms".format(epoch, out, (e_end-e_start)*1000))
    if(epoch%500==0):
        fig, ax = plt.subplots(2,5)
        for i in range(2):
            for j in range(5):
                temp = np.clip(K.get_value(x_representation),0,255).astype('uint8')
                ax[i][j].imshow(temp[5*i+j].reshape(28,28))
        plt.show()
print("Total Time taken: {:.2f}".format(time()-start))

## Trail Cells

Key finding: model has no understanding of the digits.  
Partly because, the model thinks input image is one among the 10 classes and it doesn't know that they don't belong to any.