## Transfer learning of CNN on MNIST dataset using Keras
Build transfer learning by spliting MNIST dataset.   
Train the entire cnn using first half of dataset, then fix the feature layers and fine-tune the classification layers using second half of the dataset.
* Author: Gao Yang
* Accuracy = 0.9938 for second half of test dataset after the transer learning

#### Setup the keras enviorment

In [10]:
from __future__ import division, print_function
import datetime

import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Flatten, Dropout, Activation

from keras import backend as K
import numpy as np

#### Prepare MNIST dataset

In [79]:
# ------ import MNIST from local mnist.npz -------
import os
def load_mnistdata(path_npz):
    assert os.path.isfile(path_npz)
    data = np.load(path_npz)
    x_train, y_train = data['x_train'], data['y_train']
    x_test, y_test = data['x_test'], data['y_test']
    return (x_train,y_train),(x_test,y_test)

path_npz = '/Users/Yang/Projects/keras-examples/mnist.npz'
(x_train,y_train),(x_test,y_test) = load_mnistdata(path_npz)

# split the dataset
x_train_lt5 = x_train[y_train<5]
y_train_lt5 = y_train[y_train<5]
x_test_lt5 = x_test[y_test<5]
y_test_lt5 = y_test[y_test<5]

x_train_gt5 = x_train[y_train>=5]
y_train_gt5 = y_train[y_train>=5]-5 # back to [0,1,2,3,4]
x_test_gt5 = x_test[y_test>=5]
y_test_gt5 = y_test[y_test>=5]-5

if K.image_data_format() == 'channel_first':
    input_shape = (1, img_size_rows, img_size_cols)
else:
    input_shape = (img_size_rows, img_size_cols, 1)

# reshape_data_for_cnn
def reshape_x_cnn(dataset):    
    img_size_rows, img_size_cols = dataset.shape[1], dataset.shape[2]
    
    dataset = dataset.reshape((dataset.shape[0],) + input_shape) # tuple + tuple = tuple
    dataset = dataset.astype('float32')
    dataset /= 255
    return dataset


def train_model(model, train_tuple, test_tuple, num_classes=5): # only 5 classes since we split the dataset in half
    
    # images
    x_train = reshape_x_cnn(train_tuple[0])
    x_test = reshape_x_cnn(test_tuple[0])
    
    print('x_train\'s shape:', x_train.shape)
    print('Train sample number: {}'.format(x_train.shape[0]))
    print('Test sample number: {}'.format(x_test.shape[0]))

    # labels
    y_train = keras.utils.to_categorical(train_tuple[1], num_classes)
    y_test = keras.utils.to_categorical(test_tuple[1], num_classes)

    model.compile(loss='categorical_crossentropy',
                 optimizer='adadelta',
                 metrics=['accuracy'])
    now = datetime.datetime.now
    t = now()
    # train
    model.fit(x_train,y_train,
             epochs=5,
             batch_size=128,
             verbose=1,
             validation_data=(x_test,y_test))
    
    print('Training time: {}'.format(now()-t))
    
    # evaluate
    score = model.evaluate(x_test,y_test,verbose=1)
    print('Test loss: {:.06f}'.format(score[0]))
    print('Test accuracy: {:.06f}'.format(score[1]))


#### Setup two sets of layers

In [80]:
feature_layers = [
    Conv2D(32, kernel_size=(3,3),
           padding='valid',
           activation='relu',
           input_shape=input_shape),
    Conv2D(32, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.3),
    Flatten(),
]

classification_layers = [
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(5, activation='softmax')
]

model = Sequential(feature_layers + classification_layers) # ... how simple

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_25 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
dropout_24 (Dropout)         (None, 12, 12, 32)        0         
_________________________________________________________________
flatten_13 (Flatten)         (None, 4608)              0         
_________________________________________________________________
dense_23 (Dense)             (None, 128)               589952    
_________________________________________________________________
dropout_25 (Dropout)         (None, 128)               0         
__________

#### Train both the feature layers and classification layers for digits < 5

In [81]:
train_model(model,
            (x_train_lt5, y_train_lt5),
            (x_test_lt5, y_test_lt5),
            num_classes=5)

x_train's shape: (30596, 28, 28, 1)
Train sample number: 30596
Test sample number: 5139
Train on 30596 samples, validate on 5139 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:04:59.885455
Test loss: 0.005113
Test accuracy: 0.998249


#### Freeze the feature layers, then train the classification layers for digits >= 5

In [83]:
for layer in feature_layers:
    layer.trainable = False

model.summary() 
# check out "trainable parameters" and "non-trainable parameters"
# non-trainable parameters: 320+9248=9568

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_25 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
dropout_24 (Dropout)         (None, 12, 12, 32)        0         
_________________________________________________________________
flatten_13 (Flatten)         (None, 4608)              0         
_________________________________________________________________
dense_23 (Dense)             (None, 128)               589952    
_________________________________________________________________
dropout_25 (Dropout)         (None, 128)               0         
__________

In [82]:
train_model(model,
           (x_train_gt5, y_train_gt5),
           (x_test_gt5, y_test_gt5),
           num_classes=5)

x_train's shape: (29404, 28, 28, 1)
Train sample number: 29404
Test sample number: 4861
Train on 29404 samples, validate on 4861 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:01:52.538439
Test loss: 0.020362
Test accuracy: 0.993828
