**Convolutional Neural Network:**

Björn Leon Neumann (17-619-586)

Alexander Martin Mattes (17-619-529)


**Accuracy rate (evaluation data)**: 0.9501000046730042

**Accuracy rate (training data)**: ~0.992

The accuracy rate on the training data differs slightly whenever the code is run, as it is the accuracy resulting from going through 1 more epoch starting from the weights that result in an evaluation accuracy of 95.01%



**Instructions (IMPORTANT):**

If you want to load the weights that result in a test accuracy of 95.01%, set the variable **load_existing_weights** to **True**. 

If you set **load_weights_from_remote** to **False**, keep in mind, that you are required to have the cp-0200.ckpt file in a folder called **weights** in order to make this work. Thus, you have to create a folder called **weights** and put the following file into it: https://drive.google.com/open?id=19p_AsT2WVSdpvtmm-Uv3q51SD9lSqxXb

However, if you leave **load_weights_from_remote** to **True**, the weights will be loaded in raw form from https://drive.google.com/uc?id=19p_AsT2WVSdpvtmm-Uv3q51SD9lSqxXb

If you want to train the model from scratch, set the variable **load_existing_weights** to **False**

If you want to use data augmentation, set the variable **data_augmentation** to **True**. Otherwise set it to **False**.

# 0. Set your initial hyperparameters

In [0]:
load_existing_weights = False # CHANGE TO FALSE if you want to train the model from scratch
data_augmentation = True

load_weights_from_remote = True
weights_file_url = 'https://drive.google.com/uc?id=19p_AsT2WVSdpvtmm-Uv3q51SD9lSqxXb'


batch_size = 256
num_classes = 10
epochs = 200

learning_rate = 0.001


data_augmentation_params = {
  "rotation_range": 10,      # randomly rotate images in the range (degrees, 0 to 180) (WAS at 10)
  "width_shift_range": 0.1,  # randomly shift images horizontally (fraction of total width)
  "height_shift_range": 0.1, # randomly shift images vertically (fraction of total height)
  "shear_range": 0.1,        # set range for random shear
  "horizontal_flip": True  # randomly flip images
}

# 1. Imports and Setup

In [2]:
!pip install pyyaml h5py  # Required to save models in HDF5 format
import os

from __future__ import print_function
import keras
from keras.datasets import fashion_mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

import numpy as np
np.random.seed(1337) # To have consistency

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.utils.data_utils import get_file
from keras.utils.vis_utils import plot_model


# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)



Using TensorFlow backend.


Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz


# 2. Setting up Model

In [3]:
model = Sequential()

## Progress
# Went from 2 layers to 3
# Added BatchNormalization
# Went from 3 to 4 convolutional layers
# Removed the MaxPooling from the 1st and 3rd layer
# Went from 32-64-128-256 to 64-128-256-512 -> worse results
# Went back to 32-64-128-256 and increased the batch size from 256 to 512 -> converged slower (eventually slightly worse results)
# Set learning rate to 0.0001 (converged slower) -> worse results

# Training_1: rotation 90 degrees; last fully connected layer 256
# Training_2: rotation 90 degrees; last fully connected layer 512
# Training_3: rotation 10 degrees; last fully connected layer 512

### Changed hperparameters summary:
## Initial model:
# Batch size
# Number of epochs
# Number of layers
# Neuron output size
# Different Activation functions
# Different Optimizers
# Kernzel Size
# Pool Size
# Learning rate
## Additional hyperparameters (data augmentation):
# Rotation range
# Width shift range
# Height shift range
# Shear range
# Horizontal flip


model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
model.add(BatchNormalization())
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu')) #256 before
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

print(model.summary())
#plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
              metrics=['accuracy'])


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 28, 28, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
batch_normalization_2 (Batch (None, 28, 28, 32)        128       
_________________________________________________________________
dropout_1 (Dropout)          (None, 28, 28, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
batch_normalization_3 (Batch (None, 28, 28, 64)       

# 3. Model Training

In [4]:
if not load_existing_weights:

  # The following saving weights functionality has been commented out for submission purposes

  # Save weights
  ###checkpoint_path = "training_1/cp-{epoch:04d}.ckpt"
  ###checkpoint_dir = os.path.dirname(checkpoint_path)
  # Create a callback that saves the model's weights
  ###cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
  ###                                                save_weights_only=True,
  ###                                                verbose=1)

  if not data_augmentation:

      print('Not using data augmentation.')
      
      model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            verbose=1,
            validation_data=(x_test, y_test))
            ####callbacks=[cp_callback]) # Not used as part of testing
      
  else:

      print('Using real-time data augmentation.')
      # This will do preprocessing and realtime data augmentation:
      datagen = ImageDataGenerator(
          rotation_range=data_augmentation_params["rotation_range"],
          width_shift_range=data_augmentation_params["width_shift_range"],
          height_shift_range=data_augmentation_params["height_shift_range"],
          shear_range=data_augmentation_params["shear_range"],
          horizontal_flip=data_augmentation_params["horizontal_flip"])
      
      # Compute quantities required for feature-wise normalization
      # (std, mean, and principal components if ZCA whitening is applied).
      datagen.fit(x_train)

      # Fit the model on the batches generated by datagen.flow().
      model.fit_generator(datagen.flow(x_train, y_train,
                                      batch_size=batch_size),
                          epochs=epochs,
                          validation_data=(x_test, y_test),
                          workers=4)
                          ####callbacks=[cp_callback]) # Not used as part of testing

  #This creates a single collection of TensorFlow checkpoint files that are updated at the end of each epoch
  !ls {checkpoint_dir}

else:

  if not load_weights_from_remote:
    checkpoint_path = "weights/cp-0200.ckpt"
  else:
    checkpoint_path = get_file(
              'our_weights',
              weights_file_url)
    
  # Loads the weights
  model.load_weights(checkpoint_path)


Downloading data from https://drive.google.com/uc?id=19p_AsT2WVSdpvtmm-Uv3q51SD9lSqxXb
31563776/Unknown - 4s 0us/step

# 4. Results

In [5]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.21510290865816642
Test accuracy: 0.9501000046730042


Showing additional information, including **accuracy rate on the training data** by going through one more epoch starting from the weights that achieve 95.01% evaluation accuracy

Even though this is the results section, here we **train** our model again as we go through one more epoch to get results for the training.

In [0]:
# Showing additional information, including accuracy rate on the training data 
# by going through one more epoch based on the weights that achieve 95.01% evaluation accuracy

if data_augmentation:
  print('# Fit model on training data')
  history = model.fit(x_train, y_train,
                      batch_size=256,
                      epochs=1,
                      validation_data=(x_test, y_test))
  print('\nHistory dictionary showing additional information, including the accuracy rate of the training data. \nHere we are going through one more epoch based on the weights that achieve 95.01% evaluation accuracy.\n\n', history.history)

# Fit model on training data
Train on 60000 samples, validate on 10000 samples
Epoch 1/1

# 5. Confusion Matrix:

In [0]:
# Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns
import missingno as msno

# Handle table-like data and matrices :
import itertools

# Modelling Helpers :
from sklearn.metrics import confusion_matrix, classification_report

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


# Predict the values from the validation dataset
Y_pred = model.predict(x_test)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
# Convert validation observations to one hot vectors
Y_true = np.argmax(y_test,axis = 1) 
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx, classes = ['T-shirt/Top','Trouser','Pullover','Dress','Coat','Sandal','Shirt','Sneaker','Bag','Ankle Boot'])