### Import the relevant libraries

In [2]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set()

### Data

In [3]:
dig = load_digits()
train_inputs, test_inputs, train_targets, test_targets = train_test_split(dig.data, dig.target, 
                                                    test_size = 0.2, random_state=42)

train_inputs, validation_inputs, train_targets, validation_targets = train_test_split(train_inputs, train_targets, 
                                                    test_size = 0.1, random_state=42)

print(f'Train inputs shape: {train_inputs.shape}\n\
Train targets shape: {train_targets.shape[0]}\n\
Test inputs shape: {test_inputs.shape}\n\
Test targets shape: {test_targets.shape[0]}\n\
Validation inputs shape: {validation_inputs.shape}\n\
Validation targets shape: {validation_targets.shape[0]}')

Train inputs shape: (1293, 64)
Train targets shape: 1293
Test inputs shape: (360, 64)
Test targets shape: 360
Validation inputs shape: (144, 64)
Validation targets shape: 144


In [4]:
dig['DESCR']

".. _digits_dataset:\n\nOptical recognition of handwritten digits dataset\n--------------------------------------------------\n\n**Data Set Characteristics:**\n\n    :Number of Instances: 5620\n    :Number of Attributes: 64\n    :Attribute Information: 8x8 image of integer pixels in the range 0..16.\n    :Missing Attribute Values: None\n    :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)\n    :Date: July; 1998\n\nThis is a copy of the test set of the UCI ML hand-written digits datasets\nhttps://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\n\nThe data set contains images of hand-written digits: 10 classes where\neach class refers to a digit.\n\nPreprocessing programs made available by NIST were used to extract\nnormalized bitmaps of handwritten digits from a preprinted form. From a\ntotal of 43 people, 30 contributed to the training set and different 13\nto the test set. 32x32 bitmaps are divided into nonoverlapping blocks of\n4x4 and the number of on pixel

In [7]:
pd.DataFrame(train_inputs).isnull().values.any()

False

In [9]:
dig.target_names

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### Model
Outline, optimizers, loss, early stopping and training

In [132]:
# Set the input and output sizes
input_size = train_inputs.shape[1]
output_size = 10
# Use same hidden layer size for both hidden layers. Not a necessity.
hidden_layer_size = 75
    
# define how the model will look like
model = tf.keras.Sequential([
    # tf.keras.layers.Dense is basically implementing: output = activation(dot(input, weight) + bias)
    # it takes several arguments, but the most important ones for us are the hidden_layer_size and the activation function
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='sigmoid'), # 2nd hidden layer
    # the final layer is no different, we just make sure to activate it with softmax
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])


### Choose the optimizer and the loss function

# we define the optimizer we'd like to use, 
# the loss function, 
# and the metrics we are interested in obtaining at each iteration
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Training
# That's where we train the model we have built.

# set the batch size
batch_size = 35

# set a maximum number of training epochs
max_epochs = 100

# set an early stopping mechanism
# let's set patience=2, to be a bit tolerant against random validation loss increases
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

# fit the model
# note that this time the train, validation and test data are not iterable
model.fit(train_inputs, # train inputs
          train_targets, # train targets
          batch_size=batch_size, # batch size
          epochs=max_epochs, # epochs that we will train for (assuming early stopping doesn't kick in)
          # callbacks are functions called by a task when a task is completed
          # task here is to check if val_loss is increasing
          callbacks=[early_stopping], # early stopping
          validation_data=(validation_inputs, validation_targets), # validation data
          verbose = 2 # making sure we get enough information about the training process
          )  

Train on 1293 samples, validate on 144 samples
Epoch 1/100
1293/1293 - 0s - loss: 1.8436 - accuracy: 0.4965 - val_loss: 1.3909 - val_accuracy: 0.7639
Epoch 2/100
1293/1293 - 0s - loss: 1.0842 - accuracy: 0.8577 - val_loss: 0.8598 - val_accuracy: 0.8542
Epoch 3/100
1293/1293 - 0s - loss: 0.6900 - accuracy: 0.9188 - val_loss: 0.5837 - val_accuracy: 0.9444
Epoch 4/100
1293/1293 - 0s - loss: 0.4676 - accuracy: 0.9536 - val_loss: 0.4064 - val_accuracy: 0.9792
Epoch 5/100
1293/1293 - 0s - loss: 0.3375 - accuracy: 0.9629 - val_loss: 0.3083 - val_accuracy: 0.9722
Epoch 6/100
1293/1293 - 0s - loss: 0.2581 - accuracy: 0.9683 - val_loss: 0.2453 - val_accuracy: 0.9792
Epoch 7/100
1293/1293 - 0s - loss: 0.1983 - accuracy: 0.9760 - val_loss: 0.2086 - val_accuracy: 0.9722
Epoch 8/100
1293/1293 - 0s - loss: 0.1635 - accuracy: 0.9760 - val_loss: 0.1800 - val_accuracy: 0.9722
Epoch 9/100
1293/1293 - 0s - loss: 0.1359 - accuracy: 0.9853 - val_loss: 0.1607 - val_accuracy: 0.9722
Epoch 10/100
1293/1293 - 0

<tensorflow.python.keras.callbacks.History at 0x40207f08>

### Evaluating model

In [133]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [134]:
print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


Test loss: 0.08. Test accuracy: 97.50%
