# Digit Classification using Dense Neural Network (DNN) - Version 2 
 - Using validation and test data 

## ==> Verify on Runtime tab if GPU is active on Runtime type

## Import Libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

## Upload and Explore Dataset
[MNIST](http://yann.lecun.com/exdb/mnist/) handwritten digits dataset

The MNIST database of handwritten digits, is also available from this [TF page](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/mnist/load_data), has a training set of 60,000 28x28 grayscale images of the 10 digits along a test set of 10,000 images. It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image.


In [7]:
data = tf.keras.datasets.mnist

(tt_images, tt_labels), (test_images, test_labels) = data.load_data()

In [None]:
print(tt_images.shape)
print(tt_labels.shape)

In [None]:
print(test_images.shape)
print(test_labels.shape)

In [None]:
tt_labels[:10]

In [None]:
plt.hist(tt_labels);
plt.hist(test_labels);

In [17]:
val_images = tt_images[:10000]
val_labels = tt_labels[:10000]

In [18]:
train_images = tt_images[10000:]
train_labels = tt_labels[10000:]

In [None]:
print(train_images.shape)
print(train_labels.shape)

In [None]:
print(val_images.shape)
print(val_labels.shape)

In [None]:
plt.hist(train_labels, alpha=0.5, label='Train')
plt.hist(test_labels, alpha=0.5, label='Val')
plt.hist(val_labels, alpha=0.5, label='Test');

In [None]:
img = 1000
print("     Label of image {} is: {}".format(img, test_labels[img]))
plt.imshow(test_images[img], cmap='gray');

### Preprocessing Data 

**Normalizing Data**: 
We notice that all of the values in the number are between 0 and 255. If we are training a neural network, for various reasons it's easier if we treat all values as between 0 and 1, a process called 'normalizing'.

In [25]:
train_images  = train_images / 255.0
val_images = val_images / 255.0
test_images = test_images / 255.0

## Define and Compile Model

In [None]:
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
                                    tf.keras.layers.Dense(20, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])
model.summary()

In [27]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense

In [None]:
model = Sequential([Flatten(input_shape=(28,28)),
                    Dense(20, activation='relu'),
                    Dense(10, activation='softmax')])
model.summary()

In [None]:
model = Sequential()
model.add(Flatten(input_shape=(28,28)))
#model.add(Dense(20, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.summary()

In [38]:
def create_model():
  model = Sequential()
  model.add(Flatten(input_shape=(28,28)))
  model.add(Dense(20, activation='relu'))
  model.add(Dense(10, activation='softmax')) 
  return model 

In [39]:
model = create_model()

In [31]:
model.compile(
    optimizer='adam', # uses default learning_rate=0.001
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
    )

In [33]:
from tensorflow.keras.optimizers import Adam, SGD

In [34]:
opt = Adam(learning_rate=0.01)
model.compile(
    optimizer=opt,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
    )

## Train the model

You could leave the training data with all samples, and alternativelly use: 
- `validation_split=0.1` instead of `validation_data=(val_images, val_labels)`.

In this case, TF will split the validation data by itself. 

In [None]:
history = model.fit(
    train_images, 
    train_labels, 
    epochs=20, 
    validation_data=(val_images, val_labels) 
    )

Inspecting the model

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(loc='upper left')
plt.show()

If validation accuracy seems “instable”, could be that Learning Rate is high (try to reduce it). 

Let's start over with a lower Lr (for exampe: 0.0001)

In [None]:
opt = Adam(learning_rate=0.001)

model = create_model()
model.compile(
    optimizer=opt,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
    )

history = model.fit(
    train_images, 
    train_labels, 
    epochs=20, 
    validation_data=(val_images, val_labels) 
    )

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(loc='upper left')
plt.show()

## Testing the trained model


In [None]:
model.evaluate(test_images, test_labels)

**Accuracy**
- Train: 0.97; 
- Validation: 0.96 
- Test: 0.95

In [None]:
plt.imshow(test_images[0]);

In [None]:
print(test_labels[0])

In [None]:
predictions = model.predict(test_images)
print(predictions[0])

In [None]:
# Returns the indices of the maximum values along an axis.
np.argmax(predictions[0])

In [None]:
predictions = np.argmax(predictions, axis=-1)
predictions.shape

In [None]:
predictions[:10]

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

print(classification_report(test_labels, predictions))

In [None]:
confusion_matrix(test_labels,predictions)

In [None]:
import seaborn as sns
plt.figure(figsize=(15,8))
sns.heatmap(confusion_matrix(test_labels,predictions), cmap="Blues", annot=True, fmt='g');
plt.title('Confusion Matrix')
plt.xlabel("Predictions")
plt.ylabel("Real values")
plt.show()

In [52]:
model.save('MNIST_v2_model.h5')

Use [Netron](https://netron.app) to visualize the model, hyperparameters, tensor shapes, etc. Netron is a viewer for neural network, deep learning and machine learning models (See [GitHub](https://github.com/lutzroeder/netron) for instructions about instalation in your desktop). 