# Cambridge Spark Deep Learning Tutorial Series
## MNIST and CIFAR-10
1. https://cambridgespark.com/content/tutorials/deep-learning-for-complete-beginners-recognising-handwritten-digits/index.html
2. https://cambridgespark.com/content/tutorials/convolutional-neural-networks-with-keras/index.html
3. https://cambridgespark.com/content/tutorials/neural-networks-tuning-techniques/index.html

### Applying a deep MLP to MNIST

In [1]:
from keras.datasets import mnist 
# subroutines for fetching the MNIST dataset
from keras.models import Model 
# basic class for specifying and training a neural network
from keras.layers import Input, Dense 
# the two types of neural network layer we will be using
from keras.utils import np_utils 
# utilities for one-hot encoding of ground truth values

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# hyperparameters
batch_size = 128 # in each iteration, we consider 128 training examples at once
num_epochs = 20 # we iterate twenty times over the entire training set
hidden_size = 512 # there will be 512 neurons in both hidden layers

In [3]:
num_train = 60000
num_test = 10000

# MNIST images are 28x28 and greyscale
height, width, depth = 28, 28, 1 

#fetch MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
# there 10 classes (1 per digit)
num_classes = 10

# flatten data to 1D
x_train = x_train.reshape(num_train, height * width)
x_test = x_test.reshape(num_test, height * width)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# normalise to 0-1 range
x_train /= 255
x_test /= 255

# one-hot encode the labels
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

In [5]:
# Our input is a 1D vector of size 784
inp = Input(shape=(height * width,))

# First hidden ReLU layer
hidden_1 = Dense(hidden_size, activation='relu')(inp)
# Second hidden Relu layer
hidden_2 = Dense(hidden_size, activation='relu')(hidden_1)
hidden_3 = Dense(hidden_size, activation='relu')(hidden_2)
# Output softmax layer
out = Dense(num_classes, activation='softmax')(hidden_3)

In [6]:
model = Model(inputs=inp, outputs=out)

In [7]:
# using the cross-entropy loss function
# using the Adam optimiser
# reporting the accuracy
model.compile(loss='categorical_crossentropy', optimizer='adam',
             metrics=['accuracy'])

In [8]:
# train model, and hold out 10% of data for validation
model.fit(x_train, y_train, batch_size=batch_size, epochs=num_epochs,
          verbose=1, validation_split=0.1)

# evaluate model with test set
model.evaluate(x_test, y_test, verbose=1)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[0.10409595887126574, 0.9813]

### Applying a deep CNN to CIFAR-10

In [9]:
from keras.datasets import cifar10 
# fetching the CIFAR-10 dataset
from keras.models import Model 
# basic class for specifying and training a neural network
from keras.layers import Input, Convolution2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.utils import np_utils 
# utilities for one-hot encoding of ground truth values
import numpy as np

In [10]:
batch_size = 32 
# in each iteration, we consider 32 training examples at once
num_epochs = 200 
# we iterate 200 times over the entire training set
kernel_size = 3 
# we will use 3x3 kernels throughout
pool_size = 2 
# we will use 2x2 pooling throughout
conv_depth_1 = 32 
# we will initially have 32 kernels per conv. layer...
conv_depth_2 = 64 
# ...switching to 64 after the first pooling layer
drop_prob_1 = 0.25 
# dropout after pooling with probability 0.25
drop_prob_2 = 0.5 
# dropout in the fully connected layer with probability 0.5
hidden_size = 512 
# the fully connected (FC) layer will have 512 neurons

In [11]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data() 
# fetch CIFAR-10 data

num_train, height, width, depth = X_train.shape 
# there are 50000 training examples in CIFAR-10 
num_test = X_test.shape[0] 
# there are 10000 test examples in CIFAR-10
num_classes = np.unique(y_train).shape[0] 
# there are 10 image classes

X_train = X_train.astype('float32') 
X_test = X_test.astype('float32')
X_train /= np.max(X_train) 
# Normalise data to [0, 1] range
X_test /= np.max(X_test) 
# Normalise data to [0, 1] range

Y_train = np_utils.to_categorical(y_train, num_classes) 
# One-hot encode the labels
Y_test = np_utils.to_categorical(y_test, num_classes) 
# One-hot encode the labels

In [12]:
inp = Input(shape=(height, width, depth)) 
# depth goes last in TensorFlow back-end (first in Theano)

# Conv [32] -> Conv [32] -> Pool (with dropout on the pooling layer)
conv_1 = Convolution2D(conv_depth_1, (kernel_size, kernel_size),
                       padding='same', activation='relu')(inp)
conv_2 = Convolution2D(conv_depth_1, (kernel_size, kernel_size),
                       padding='same', activation='relu')(conv_1)
pool_1 = MaxPooling2D(pool_size=(pool_size, pool_size))(conv_2)
drop_1 = Dropout(drop_prob_1)(pool_1)

# Conv [64] -> Conv [64] -> Pool (with dropout on the pooling layer)
conv_3 = Convolution2D(conv_depth_2, (kernel_size, kernel_size),
                       padding='same', activation='relu')(drop_1)
conv_4 = Convolution2D(conv_depth_2, (kernel_size, kernel_size),
                       padding='same', activation='relu')(conv_3)
pool_2 = MaxPooling2D(pool_size=(pool_size, pool_size))(conv_4)
drop_2 = Dropout(drop_prob_1)(pool_2)

# Now flatten to 1D, apply FC -> ReLU (with dropout) -> softmax
flat = Flatten()(drop_2)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_3 = Dropout(drop_prob_2)(hidden)
out = Dense(num_classes, activation='softmax')(drop_3)

In [13]:
model = Model(inputs=inp, outputs=out) 
# To define a model, just specify its input and output layers

# using the cross-entropy loss function
# using the Adam optimiser
# reporting the accuracy
model.compile(loss='categorical_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

In [16]:
# Train the model using the training set...
# ...holding out 10% of the data for validation

'''model.fit(X_train, Y_train,                
          batch_size=batch_size, epochs=num_epochs,
          verbose=1, validation_split=0.1) 
'''
# Evaluate the trained model on the test set!
'''model.evaluate(X_test, Y_test, verbose=1)'''

### computer power and time consuming task

'model.evaluate(X_test, Y_test, verbose=1)'