# Convolutional neural network with Keras

---



## **Data import**

Data can be downloaded from the following link:
[fashion-mnist](https://github.com/zalandoresearch/fashion-mnist)

Data is to be uploaded to Gdrive and kept in folder data/fashion/ for the code to work.

In [0]:
# Install the packages
!pip install -U -q PyDrive
!pip install keras

In [0]:
# Import packages
import os
import keras
import numpy as np
from google.colab import auth
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.client import GoogleCredentials
from keras.applications.resnet50 import preprocess_input, decode_predictions

# Mount data from Google drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

In [0]:
# Authenticate and create the PyDrive client.
auth.authenticate_user()
google_auth = GoogleAuth()
google_auth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(google_auth)

In [0]:
# Mnist reader function sourced from https://github.com/zalandoresearch/fashion-mnist/blob/master/utils/mnist_reader.py
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

In [0]:
# Set data to X-train/X-test and y_train/y_test
X_train, y_train = load_mnist('gdrive/My Drive/data/fashion', kind='train')
X_test, y_test = load_mnist('gdrive/My Drive/data/fashion', kind='t10k')

In [0]:
print("Shape of X_train is :",X_train.shape)
print("Shape of Y_train is :",y_train.shape)
print("Shape of X_test is :",X_test.shape)
print("Shape of Y_test is :",y_test.shape)

In [0]:
# Class names in Mnist-fashion dataset
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
                'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

# Find the number of images in each class
for i in range(0,10):
  print("Number of %s images: %i" % (class_names[i], len(y_train[y_train == i])))

In [0]:
# Display the gray scale images in a 10*10 grid
from matplotlib import pyplot as plt

plt.figure(figsize=(10,10))
for i in range(100):
    plt.subplot(10,10,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train[i].reshape(28,28), cmap='gray')
    plt.xlabel(class_names[y_train[i]])
plt.show()

In [0]:
# Reshape 2d to 3d array
im_rows = 28 # height
im_cols = 28 # width
batch_size = 512 # size of batch
im_shape = (im_rows, im_cols, 1) # shape of array 28*28*1

# reshape array
X_train = X_train.reshape(X_train.shape[0], *im_shape) 
X_test = X_test.reshape(X_test.shape[0], *im_shape)

print('X_train shape:{}'.format(X_train.shape))
print('X_test shape:{}'.format(X_test.shape))

num_classes = 10
# convert number to categories
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

## Transfer Learning

In [0]:
# To train on ResNet50 model pad zeros to make 28*28*1 array to 32*32*3 array
X_train = np.pad(X_train,((0,0),(2,2),(2,2),(1,1)), mode='constant', constant_values=0)
X_test = np.pad(X_test,((0,0),(2,2),(2,2),(1,1)), mode='constant', constant_values=0)

print('X_train shape:{}'.format(X_train.shape))
print('X_test shape:{}'.format(X_test.shape))

### Add two dense layers:

In [0]:
import tensorflow as tf
from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K

# Load the ResNet50 model with pre-trained weights. 
# Do not include fully connected layers as num of classes in 
# Mnist fashion dataset is 10 and the model was trained for 50 classes
ResNet50_base = ResNet50(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
    
# Add a global spatial average pooling layer
x = ResNet50_base.output
x = GlobalAveragePooling2D()(x)

# Fully-connected layer
x = Dense(512, activation='relu')(x)

# Fully connected output/classification layer
predictions = Dense(10, activation='softmax')(x)

# Create a full network with the new layers
ResNet50_transfer = Model(input=ResNet50_base.input, output=predictions)

# Freeze all layers of the ResNet50 model
for layer in ResNet50_transfer.layers:
    layer.trainable = False
    
# Compile the model
ResNet50_transfer.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

#### Train model

In [0]:
# Train the model
history = ResNet50_transfer.fit(X_train, y_train,
         batch_size=256, epochs=10,
         validation_split=0.2,
         verbose=1, shuffle=True)

#### Learning curves

In [0]:
# Training and validation accuracy
acc = history.history['acc']
val_acc = history.history['val_acc']

# Training and validation loss
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(18, 16))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,8])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

#### Evaluation

In [0]:
# Model evaluation
score = ResNet50_transfer.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (ResNet50_transfer.metrics_names[1], score[1]*100))

###  Unfreeze last two layers:

In [0]:
# Set last two layers for training
for layer in ResNet50_transfer.layers[176:]:
    layer.trainable = True
    
# Compile the model
ResNet50_transfer.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

#### Train model

In [0]:
# Train the model
history = ResNet50_transfer.fit(X_train, y_train,
         batch_size=256, epochs=10,
         validation_split=0.2,
         verbose=1, shuffle=True)

#### Learning curves

In [0]:
# Training and validation accuracy
acc = history.history['acc']
val_acc = history.history['val_acc']

# Training and validation loss
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(18, 16))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='upper left')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper left')
plt.ylabel('Cross Entropy')
plt.ylim([0,8])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

#### Evaluation

In [0]:
# Model evaluation
score = ResNet50_transfer.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (ResNet50_transfer.metrics_names[1], score[1]*100))

## CNN with Keras

In [0]:
# Set data to X-train/X-test and y_train/y_test
X_train, y_train = load_mnist('gdrive/My Drive/data/fashion', kind='train')
X_test, y_test = load_mnist('gdrive/My Drive/data/fashion', kind='t10k')

# Reshape 2d to 3d array
im_rows = 28 # height
im_cols = 28 # width
batch_size = 512 # size of batch
im_shape = (im_rows, im_cols, 1) # shape of array 28*28*1

# reshape array
X_train = X_train.reshape(X_train.shape[0], *im_shape) 
X_test = X_test.reshape(X_test.shape[0], *im_shape)

num_classes = 10
# convert number to categories
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print('X_train shape:{}'.format(X_train.shape))
print('X_test shape:{}'.format(X_test.shape))

print('Y_train shape:{}'.format(y_train.shape))
print('y_test shape:{}'.format(y_test.shape))

### Model architecture

In [0]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers.convolutional import Conv2D, MaxPooling2D

name = 'cnn_base_model'
cnn_base_model = Sequential([ # Stack layers in a sequence
    Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=im_shape, name='Conv2D-1'),#filter windows-32, kernel_window_size-2*2, padding - same for zero padding
    MaxPooling2D(pool_size=2, name='MaxPool-1'), # Max_pool_window_size - 2
    Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', name='Conv2D-2'),#filter_windows-64, kernel_window_size-3*3, padding - same for zero padding
    MaxPooling2D(pool_size=2, name='MaxPool-2'), # Max_pool_window_size - 2
    Flatten(name='flatten'), # Flatten array to single dimension
    Dense(128, activation='relu', name='Dense-1'), # Fully connected layer with relu activation
    Dropout(0.3, name='Dropout-1'),
    Dense(10, activation='softmax', name='Output') # Fully connected layer with softmax activation
], name = name)
  
cnn_base_model.compile(
    loss='categorical_crossentropy',
    optimizer= 'adam',
    metrics=['accuracy']
  ) 

cnn_base_model.summary()

#### Train model

In [0]:
# Train the model
cnn_base_model.fit(
          X_train, y_train, batch_size=batch_size,
          epochs=10, verbose=1,
          validation_data=(X_test, y_test)
      )

#### Evaluation

In [0]:
# Model evaluation
score = cnn_base_model.evaluate(X_test, y_test, verbose=0)
print("5 layer CNN %s: %.2f%%" % (cnn_base_model.metrics_names[1], score[1]*100))

#### Grid search

In [0]:
# Set data to X-train/X-test and y_train/y_test
X_train, y_train = load_mnist('gdrive/My Drive/data/fashion', kind='train')
X_test, y_test = load_mnist('gdrive/My Drive/data/fashion', kind='t10k')

# Reshape 2d to 3d array
im_rows = 28 # height
im_cols = 28 # width
im_shape = (im_rows, im_cols, 1) # shape of array 28*28*1

# reshape array
X_train = X_train.reshape(X_train.shape[0], *im_shape) 
X_test = X_test.reshape(X_test.shape[0], *im_shape)

In [0]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import Adam


# define the grid search parameters
batch_size = [500, 750, 1000]
epochs = [40, 60]
learn_rate = [0.001, 0.01, 0.1]
dropout_rate = [0.2, 0.3, 0.4]
param_grid = dict(batch_size=batch_size, epochs = epochs, learn_rate = learn_rate, dropout_rate = dropout_rate)

def create_model(learn_rate = 0.1, dropout_rate = 0.01):
  name = 'cnn_model'
  cnn_model = Sequential([# Stack sequential layers
    Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=im_shape, name='Conv2D-1'),#filter windows-32, kernel_window_size-2*2, padding - same for zero padding
    MaxPooling2D(pool_size=2, name='MaxPool-1'), # Max_pool_window_size - 2
    Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', name='Conv2D-2'),#filter_windows-64, kernel_window_size-3*3, padding - same for zero padding
    MaxPooling2D(pool_size=2, name='MaxPool-2'), # Max_pool_window_size - 2
    Flatten(name='flatten'), # Flatten array to single dimension
    Dense(128, activation='relu', name='Dense-1'), # Fully connected layer with relu activation
    Dropout(dropout_rate, name='Dropout-1'),
    Dense(10, activation='softmax', name='Output') # Fully connected layer with softmax activation
  ], name = name)
  cnn_model.compile(
    loss='sparse_categorical_crossentropy', # Cost function
    optimizer= Adam(lr=learn_rate), # Optimization algorithm
    metrics=['accuracy'] # Evaluation metric
  ) 
  return cnn_model

# create model
cnn_model = KerasClassifier(build_fn=create_model, nb_epoch=10, verbose=0)

grid = GridSearchCV(estimator=cnn_model, param_grid=param_grid, scoring="accuracy", cv=5, n_jobs=1) # Set the model and hyperparamter grid
grid_result = grid.fit(X_train, y_train) # Train the model

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score'] # Mean test set accuracy
stds = grid_result.cv_results_['std_test_score'] 
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param)) # The best hyperparamter combination is 'batch_size': 1000, 'dropout_rate': 0.4, 'epochs': 40, 'learn_rate': 0.001

## Best model

In [0]:
# Set data to X-train/X-test and y_train/y_test
X_train, y_train = load_mnist('gdrive/My Drive/data/fashion', kind='train')
X_test, y_test = load_mnist('gdrive/My Drive/data/fashion', kind='t10k')

# Reshape 2d to 3d array
im_rows = 28 # height
im_cols = 28 # width
batch_size = 1024 # size of training batches
epochs = 40 # number of times data passes through forward and backward propogation
im_shape = (im_rows, im_cols, 1) # shape of array 28*28*1

# reshape array
X_train = X_train.reshape(X_train.shape[0], *im_shape) 
X_test = X_test.reshape(X_test.shape[0], *im_shape)

num_classes = 10
# convert number to categories
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print('X_train shape:{}'.format(X_train.shape))
print('X_test shape:{}'.format(X_test.shape))

print('Y_train shape:{}'.format(y_train.shape))
print('y_test shape:{}'.format(y_test.shape))

#### Create model

In [0]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Dropout, Flatten
from keras.optimizers import Adam, Nadam, Adadelta, SGD

def create_model(optimizer = 'adam'):
  name = 'cnn_model_14L' # Set name for the model
  cnn_model_14L = Sequential([
      Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=im_shape, name='Conv2D-1'), #filter windows-32, kernel_window_size-3*3
      Conv2D(filters=64, kernel_size=3, activation='relu', name='Conv2D-2'),#filter windows-64, kernel_window_size-3*3
      Conv2D(filters=128, kernel_size=3, activation='relu', name='Conv2D-3'),#filter windows-128, kernel_window_size-3*3
      MaxPooling2D(pool_size=2, name='MaxPool'),# Max_pool_window_size - 2
      Conv2D(filters=128, kernel_size=3, activation='relu', name='Conv2D-4'),#filter windows-128, kernel_window_size-3*3    
      Dropout(0.25, name='Dropout-1'),# drop 0.25 of the units
      Conv2D(filters=256, kernel_size=3, activation='relu', name='Conv2D-5'),#filter windows-256, kernel_window_size-3*3  
      Dropout(0.4, name='Dropout-2'), #drop 0.4 of the units
      Flatten(name='flatten'), # Flatten to 1D array
      Dense(256, activation='relu', name='Dense-1'), # Linear fully connected unit.
      Dense(256, activation='relu', name='Dense-2'), # Linear fully connected unit.
      Dropout(0.4, name='Dropout-3'), # Dropout 0.4 of the units
      Dropout(0.4, name='Dropout-4'), # Dropout 0.5 of the units
      Dense(10, activation='softmax', name='Output') # Softmax activation for multiclass classification
    ], name = name)
  
  cnn_model_14L.compile( # Compile the model
    loss='categorical_crossentropy',
    optimizer= optimizer,
    metrics=['accuracy']
  ) 
  return cnn_model_14L

#### Optimizers

In [0]:
optimizers = ['adam', 'sgd', 'adadelta', 'nadam'] # Define various optimizers

for optimizer in optimizers:
    if optimizer == 'adam':
      # Create model
      cnn_model_14L_adam = create_model(optimizer = optimizer)
      # Train model
      history_adam = cnn_model_14L_adam.fit(
          X_train, y_train, batch_size=batch_size,
          epochs=epochs, verbose=1,
          validation_data=(X_test, y_test)
      )
      # Model evaluation
      score = cnn_model_14L_adam.evaluate(X_test, y_test, verbose=0)
      print("Adam optimizer %s: %.2f%%" % (cnn_model_14L_adam.metrics_names[1], score[1]*100))

    if optimizer == 'sgd':
      # Create model
      cnn_model_14L_sgd = create_model(optimizer = optimizer) 
      # Train model
      history_sgd = cnn_model_14L_sgd.fit(
          X_train, y_train, batch_size=batch_size,
          epochs=epochs, verbose=1,
          validation_data=(X_test, y_test)
      )
      # Model evaluation
      score = cnn_model_14L_sgd.evaluate(X_test, y_test, verbose=0)
      print("Sgd optimizer %s: %.2f%%" % (cnn_model_14L_sgd.metrics_names[1], score[1]*100))

    if optimizer == 'adadelta':
      # Create model
      cnn_model_14L_adadelta = create_model(optimizer = optimizer)
      # Train model
      history_adadelta = cnn_model_14L_adadelta.fit(
          X_train, y_train, batch_size=batch_size,
          epochs=epochs, verbose=1,
          validation_data=(X_test, y_test)
      )
      # Model evaluation
      score = cnn_model_14L_adadelta.evaluate(X_test, y_test, verbose=0)
      print("AdaDelta optimizer %s: %.2f%%" % (cnn_model_14L_adadelta.metrics_names[1], score[1]*100))

    if optimizer == 'nadam':
      # Create model
      cnn_model_14L_nadam = create_model(optimizer = optimizer)
      # Train model
      history_nadam = cnn_model_14L_nadam.fit(
          X_train, y_train, batch_size=batch_size,
          epochs=epochs, verbose=1,
          validation_data=(X_test, y_test)
     )
      # Model evaluation
      score = cnn_model_14L_nadam.evaluate(X_test, y_test, verbose=0)
      print("Nadam optimizer %s: %.2f%%" % (cnn_model_14L_nadam.metrics_names[1], score[1]*100))



In [0]:
import matplotlib.pyplot as plt

adam_acc = history_adam.history['val_acc']
sgd_acc = history_sgd.history['val_acc']
adadelta_acc = history_adadelta.history['val_acc']
nadam_acc = history_nadam.history['val_acc']

adam_val_loss = history_adam.history['val_loss']
sgd_val_loss = history_sgd.history['val_loss']
adadelta_val_loss = history_adadelta.history['val_loss']
nadam_val_loss = history_nadam.history['val_loss']

plt.figure(figsize=(18, 16))
plt.subplot(2, 1, 1)
plt.plot(adam_acc, label='Adam')
plt.plot(sgd_acc, label='Sgd')
plt.plot(adadelta_acc, label='AdaDelta')
plt.plot(nadam_acc, label='Nadam')
plt.legend(loc='upper left')
plt.ylabel('Validation accuracy')

plt.subplot(2, 1, 2)

plt.plot(adam_val_loss, label='Adam')
plt.plot(sgd_val_loss, label='Sgd')
plt.plot(adadelta_val_loss, label='AdaDelta')
plt.plot(nadam_val_loss, label='Nadam')

plt.legend(loc='upper left')
plt.ylabel('Validation loss')
plt.ylim([0,1])
plt.title('Validation Loss')
plt.xlabel('epoch')
plt.show()

In [0]:
# Create model
cnn_model_14L_adadelta = create_model(optimizer = 'adadelta')
# Train model
history_adadelta = cnn_model_14L_adadelta.fit(
    X_train, y_train, batch_size=batch_size,
    epochs=epochs, verbose=1,
    validation_data=(X_test, y_test)
)
# Model evaluation
score = cnn_model_14L_adadelta.evaluate(X_test, y_test, verbose=0)
print("AdaDelta optimizer %s: %.2f%%" % (cnn_model_14L_adadelta.metrics_names[1], score[1]*100))

In [0]:
# Model evaluation
 score = cnn_model_14L_adadelta.evaluate(X_test, y_test, verbose=0)
      print("AdaDelta optimizer %s: %.2f%%" % (cnn_model_14L_adadelta.metrics_names[1], score[1]*100))

#### Save and load model


In [0]:
from keras.models import model_from_json

# Save model to disk
# # serialize model to JSON
# model_json = cnn_model_14L_adadelta.to_json()
# with open("cnn_model_14L_adadelta.json", "w") as json_file:
#     json_file.write(model_json)
# # serialize weights to HDF5
# cnn_model_14L_adadelta.save_weights("cnn_model_14L_adadelta.h5")
# print("Saved model to disk")

In [0]:
# load json and create model
json_file = open('cnn_model_14L_adadelta.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("cnn_model_14L_adadelta.h5")
print("Loaded model from disk")
 
# evaluate loaded model on test data
loaded_model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
score = loaded_model.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

#### Misclassified data

In [0]:
predicted_class_labels = cnn_model_14L_adadelta.predict_classes(X_test)
misclassified_labels = y_test[np.isclose(predicted_class_labels, y_test) != True]

# Class names in Mnist-fashion dataset
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
                'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
for i in range(1,10):
  print("{}:{}".format(class_names[i],sum(misclassified_labels == i)))