# Handwritten Digit Recognition 

Training a neural network to recognize hand-written digits from 0-9. 

## Outline 
1. Packages 


### 1. Packages 

- [numpy](https://numpy.org/) 
- [matplotlib](http://matplotlib.org) 
- [tensorflow](https://www.tensorflow.org/) 

In [16]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import linear, relu, sigmoid
#%matplotlib widget
#import matplotlib.pyplot as plt
#plt.style.use('./deeplearning.mplstyle')

import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)

#from public_tests import * 

#from autils import *
#from lab_utils_softmax import plt_softmax
#np.set_printoptions(precision=2)

### 2. Load and Check Dataset

In [17]:
# Load dataset 
X = np.load("data/X.npy")
y = np.load("data/y.npy")

In [18]:
# Check that the dataset loaded properly 
print(f'The first element of X is: {X[0]}')
print(f'The first element of y is: {y[0,0]}, the last element of y is {y[-1,0]}')
print(f'The shape of X is: {str(X.shape)}, the shape of y is {str(y.shape)}')

The first element of X is: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+0

In [19]:
# Split dataset into training, cross validation and test sets 
X_train, X_, y_train, y_ = train_test_split(X, y, test_size = 0.3)
X_crossval, X_test, y_crossval, y_test = train_test_split(X_, y_, test_size = 0.5)

print(f'The shape of X_train is {str(X_train.shape)}. The shape of y_train is {str(y_train.shape)}')
print(f'The shape of X_crossval is {str(X_crossval.shape)}. The shape of y_crossval is {str(y_crossval.shape)}')
print(f'The shape of X_test is {str(X_test.shape)}. The shape of y_test is {str(y_test.shape)}')
#print(f'The first element of y is: {y[0,0]}, the last element of y is {y[-1,0]}')
#print(f'The shape of X is: {str(X.shape)}, the shape of y is {str(y.shape)}')

The shape of X_train is (3500, 400). The shape of y_train is (3500, 1)
The shape of X_crossval is (750, 400). The shape of y_crossval is (750, 1)
The shape of X_test is (750, 400). The shape of y_test is (750, 1)


### 3. Define Model

In [20]:
model = Sequential (
    [
        tf.keras.layers.InputLayer((400,)),
        tf.keras.layers.Dense(120, activation='relu', name='L1', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.Dense(25, activation='relu', name='L2', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.Dense(15, activation='relu', name='L3', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.Dense(10, activation='linear', name='L4', kernel_regularizer=tf.keras.regularizers.l2(0.01))
    ], name = '4_Layer_model'
)

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [21]:
model.summary()

Model: "4_Layer_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 L1 (Dense)                  (None, 120)               48120     
                                                                 
 L2 (Dense)                  (None, 25)                3025      
                                                                 
 L3 (Dense)                  (None, 15)                390       
                                                                 
 L4 (Dense)                  (None, 10)                160       
                                                                 
Total params: 51695 (201.93 KB)
Trainable params: 51695 (201.93 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [25]:
# Identify the shapes of the layer parameters

[layer1, layer2, layer3, layer4] = model.layers

W1,b1 = layer1.get_weights()
W2,b2 = layer2.get_weights()
W3,b3 = layer3.get_weights()
W4, b4 = layer4.get_weights()
print(f"W1 shape = {W1.shape}, b1 shape = {b1.shape}")
print(f"W2 shape = {W2.shape}, b2 shape = {b2.shape}")
print(f"W3 shape = {W3.shape}, b3 shape = {b3.shape}")
print(f"W4 shape = {W4.shape}, b4 shape = {b4.shape}")

W1 shape = (400, 120), b1 shape = (120,)
W2 shape = (120, 25), b2 shape = (25,)
W3 shape = (25, 15), b3 shape = (15,)
W4 shape = (15, 10), b4 shape = (10,)


### 4. Train the Model

In [26]:
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
)

history = model.fit(
    X_train, y_train,
    epochs=100
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

### 5. Check Bias and Variance

In [28]:
# Define diagnostic functions

def eval_mse(y, yhat):
    m = len(y) 
    err = 0.0 
    for i in range(m):
        err += (y[i] - yhat[i])**2
    err /= 2*m
    return err

def eval_cat_err(y, yhat):
    m = len(y)
    err = 0
    for i in range(m):
        if yhat[i]!= y[i]:
            err += 1
    err = err/m
    return err

def get_predictions(model, training_set):
    pred_probs = model.predict(training_set)
    preds = np.zeros((len(pred_probs),1))
    for i in range(len(pred_probs)):
        softmax = tf.nn.softmax(pred_probs[i])
        preds[i] = [np.argmax(softmax)]
    return preds

def display_errors(model,X,y):
    f = model.predict(X)
    yhat = np.argmax(f, axis=1)
    doo = yhat != y[:,0]
    idxs = np.where(yhat != y[:,0])[0]
    return(len(idxs))

In [29]:
train_preds = get_predictions(model, X_train)
crossval_preds = get_predictions(model, X_crossval)
test_preds = get_predictions(model, X_test)
print(f'categorization error, training, regularized model: {eval_cat_err(train_preds, y_train)}')
print(f'categorization error, crossval, regularized model: {eval_cat_err(crossval_preds, y_crossval)}')

categorization error, training, regularized model: 0.05228571428571428
categorization error, crossval, regularized model: 0.09466666666666666


In [30]:
print(f"{display_errors(model,X_train,y_train)} errors out of {len(X)} images")
print(f'{display_errors(model, X_crossval, y_crossval)} errors out of {len(X_crossval)} images')
print(f'{display_errors(model, X_test, y_test)} errors out of {len(X_test)} images')

183 errors out of 5000 images
71 errors out of 750 images
70 errors out of 750 images


In [32]:
lambdas_to_test = [0.0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3]
models = [None] * len(lambdas_to_test)
for i in range(len(lambdas_to_test)):
    lambda_curr = lambdas_to_test[i]
    models[i] = Sequential(
        [
            tf.keras.layers.InputLayer((400,)),
            tf.keras.layers.Dense(120, activation='relu', name='L1', kernel_regularizer=tf.keras.regularizers.l2(lambda_curr)),
            tf.keras.layers.Dense(25, activation='relu', name='L2', kernel_regularizer=tf.keras.regularizers.l2(lambda_curr)),
            tf.keras.layers.Dense(15, activation='relu', name='L3', kernel_regularizer=tf.keras.regularizers.l2(lambda_curr)),
            tf.keras.layers.Dense(10, activation='linear', name='L4', kernel_regularizer=tf.keras.regularizers.l2(lambda_curr))
        ], name = '4_Layer_model'
    )
    models[i].compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=tf.keras.optimizers.Adam(0.01), 
    )

    models[i].fit(
        X_train, y_train, 
        epochs=100
    )
    print(f'Finished lambda = {lambda_curr}')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78