# MNIST CLASSIFICATION

### The dataset is called MNIST and refers to handwritten digit recognition.
### You can find more about it on Yann LeCun's website (Director of AI Research, Facebook).
### The goal is to write an algorithm that detects which digit is written. Since there are only 10 digits (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), this is a classification problem with 10 classes.

### Our goal would be to build a neural network with 2 hidden layers.

In [14]:
# importing the packages

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import numpy as np
import seaborn as sns
import collections

# visualization tools
%matplotlib inline
import matplotlib.pyplot as plt

In [15]:
# loading dataset
mnist_dataset, mnist_info = tfds.load(name='mnist' ,with_info=True,as_supervised=True)

In [16]:
# extracting training and test dataset
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# validation dataset setup
num_validation_samples = 0.1*mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

print(mnist_train[0])

plt.imshow(mnist_train[0, :, :, 0])
plt.colorbar()

TypeError: 'PrefetchDataset' object is not subscriptable

In [None]:
#scaling data by function
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
scaled_test_and_validation_data = mnist_test.map(scale)

# suffling dataset to properly apply sgd
Buffer_size = 10000

shuffled_train_validation_data = scaled_train_and_validation_data.shuffle(Buffer_size)

# preparing data in validation and train format
validation_data = shuffled_train_validation_data.take(num_validation_samples)

trained_data = shuffled_train_validation_data.skip(num_validation_samples)

batch_size = 100

trained_data = trained_data.batch(batch_size)
validation_data = validation_data.batch(num_validation_samples)
test_data = scaled_test_and_validation_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

In [6]:
input_size = 784
output_size = 10
# Use same hidden layer size for both hidden layers. Not a necessity.
hidden_layer_size = 100
    
# define how the model will look like
model = tf.keras.Sequential([
    
    # the first layer (the input layer)
    # each observation is 28x28x1 pixels, therefore it is a tensor of rank 3
    # since we don't know CNNs yet, we don't know how to feed such input into our net, so we must flatten the images
    # there is a convenient method 'Flatten' that simply takes our 28x28x1 tensor and orders it into a (None,) 
    # or (28x28x1,) = (784,) vector
    # this allows us to actually create a feed forward neural network
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)), # input layer
    
    # tf.keras.layers.Dense is basically implementing: output = activation(dot(input, weight) + bias)
    # it takes several arguments, but the most important ones for us are the hidden_layer_size and the activation function
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 3rd hidden layer    
    # the final layer is no different, we just make sure to activate it with softmax
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])

In [7]:
#custom_optimizer = tf.keras.optimizers.SGD(learning_rate=0.02)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#training the model 
num_epochs = 10
model.fit(trained_data, epochs = num_epochs, validation_data=(validation_inputs, validation_targets), verbose = 2)
          

Epoch 1/10
540/540 - 9s - loss: 0.3319 - accuracy: 0.9038 - val_loss: 0.1487 - val_accuracy: 0.9575
Epoch 2/10
540/540 - 7s - loss: 0.1327 - accuracy: 0.9605 - val_loss: 0.1168 - val_accuracy: 0.9657
Epoch 3/10
540/540 - 8s - loss: 0.0937 - accuracy: 0.9716 - val_loss: 0.0901 - val_accuracy: 0.9747
Epoch 4/10
540/540 - 8s - loss: 0.0731 - accuracy: 0.9774 - val_loss: 0.0682 - val_accuracy: 0.9803
Epoch 5/10
540/540 - 8s - loss: 0.0583 - accuracy: 0.9819 - val_loss: 0.0613 - val_accuracy: 0.9798
Epoch 6/10
540/540 - 8s - loss: 0.0458 - accuracy: 0.9862 - val_loss: 0.0549 - val_accuracy: 0.9820
Epoch 7/10
540/540 - 8s - loss: 0.0399 - accuracy: 0.9874 - val_loss: 0.0409 - val_accuracy: 0.9875
Epoch 8/10
540/540 - 8s - loss: 0.0329 - accuracy: 0.9894 - val_loss: 0.0467 - val_accuracy: 0.9853
Epoch 9/10
540/540 - 7s - loss: 0.0341 - accuracy: 0.9890 - val_loss: 0.0387 - val_accuracy: 0.9878
Epoch 10/10
540/540 - 7s - loss: 0.0249 - accuracy: 0.9922 - val_loss: 0.0414 - val_accuracy: 0.9882

<tensorflow.python.keras.callbacks.History at 0x14d2345e408>