In [None]:
from tensorflow.keras import layers
from tensorflow import keras 
import tensorflow as tf

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno

print(tf.__version__)

In [None]:
# Download the mnist dataset
mnist = tf.keras.datasets.mnist

# Load the dataset into variable for further processing
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
print("X_train", type(X_train))
print("y_train", type(y_train))

In [None]:
print(X_train.shape)
print(y_train.shape)
print(y_train[0])

In [None]:
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

In [None]:
y_train[0]

In [None]:
# https://stackoverflow.com/questions/44747343/keras-input-explanation-input-shape-units-batch-size-dim-etc
# 2DConv need 4dim input
# Add 4th empty dim to our input

X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))

In [None]:
# Faster training when dtype is float32
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [None]:
# Rescaling the feature
X_train = X_train / 255.0
X_test = X_test / 255.0

In [None]:
def create_model():

  # Init a sequential model
  model = keras.Sequential([
    

    # Feature detector

    # First VGG block
    # Hidden layer 1: 32 filters, 3x3 kernel, relu activation function
    layers.Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1), kernel_initializer='he_uniform', activation='relu'),
    layers.BatchNormalization(), # more stable model
      
    # Hidden layer 2: 32 filters, 3x3 kernel, relu activation function
    layers.Conv2D(32, (3, 3), padding='same', kernel_initializer='he_uniform', activation='relu'),
    layers.BatchNormalization(),

    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.2),


    # Second VGG block
    # Hidden layer 3: 64 filters, 3x3 kernel, relu activation function
    layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_uniform', activation='relu'),
    layers.BatchNormalization(),

    # Hidden layer 4: 64 filters, 3x3 kernel, relu activation function
    layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_uniform', activation='relu'),
    layers.BatchNormalization(),

    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.3),


    # Third VGG block
    # Hidden layer 5: 128 filters, 3x3 kernel, relu activation function
    layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_uniform', activation='relu'),
    layers.BatchNormalization(),

    # Hidden layer 6: 128 filters, 3x3 kernel, relu activation function
    layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_uniform', activation='relu'),
    layers.BatchNormalization(),

    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.4),
    layers.Flatten(),


    # Part 2: classifier

    # Simple ANN
    layers.Dense(128, kernel_initializer='he_uniform', activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),


    # Output layer
    layers.Dense(10, activation='softmax')

  ])

  # Compile model 
  model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer="adam",
              metrics=['accuracy'])

  return model

In [None]:
model = create_model()

In [None]:
model.summary()

In [None]:
# Data augmentation
# increase the diversity of data available for training models
# without actually collecting new data

datagen = keras.preprocessing.image.ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    rotation_range=360)

datagen.fit(X_train)
augmented_train = datagen.flow(X_train, y_train, batch_size=64)
steps = int(X_train.shape[0] / 64)

In [None]:
history = model.fit_generator(augmented_train, steps_per_epoch=steps, epochs=20, 
                              validation_data=(X_test,y_test), shuffle=True)