# Handwritten Digit Recognition 

Training a neural network to recognize hand-written digits from 0-9. 

## Outline 
1. Packages 


### 1. Packages 

- [numpy](https://numpy.org/) 
- [matplotlib](http://matplotlib.org) 
- [tensorflow](https://www.tensorflow.org/) 

In [21]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import linear, relu, sigmoid
#%matplotlib widget
#import matplotlib.pyplot as plt
#plt.style.use('./deeplearning.mplstyle')

import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)

#from public_tests import * 

#from autils import *
#from lab_utils_softmax import plt_softmax
#np.set_printoptions(precision=2)

### 2. Load and Check Dataset

In [22]:
# Load dataset 
X = np.load("data/X.npy")
y = np.load("data/y.npy")

In [23]:
# Check that the dataset loaded properly 
print(f'The first element of X is: {X[0]}')
print(f'The first element of y is: {y[0,0]}, the last element of y is {y[-1,0]}')
print(f'The shape of X is: {str(X.shape)}, the shape of y is {str(y.shape)}')

The first element of X is: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+0

In [24]:
# Split dataset into training, cross validation and test sets 
X_train, X_, y_train, y_ = train_test_split(X, y, test_size = 0.4)
X_crossval, X_test, y_crossval, y_test = train_test_split(X_, y_, test_size = 0.2)

### 3. Define Model

In [9]:
model = Sequential (
    [
        tf.keras.layers.InputLayer((400,)),
        tf.keras.layers.Dense(25, activation='relu', name='L1'),
        tf.keras.layers.Dense(15, activation='relu', name='L2'),
        tf.keras.layers.Dense(10, activation='linear', name='L3')
    ], name = '3_Layer_model'
)

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [10]:
model.summary()

In [11]:
[layer1, layer2, layer3] = model.layers

In [12]:
W1,b1 = layer1.get_weights()
W2,b2 = layer2.get_weights()
W3,b3 = layer3.get_weights()
print(f"W1 shape = {W1.shape}, b1 shape = {b1.shape}")
print(f"W2 shape = {W2.shape}, b2 shape = {b2.shape}")
print(f"W3 shape = {W3.shape}, b3 shape = {b3.shape}")

W1 shape = (400, 25), b1 shape = (25,)
W2 shape = (25, 15), b2 shape = (15,)
W3 shape = (15, 10), b3 shape = (10,)


### 4. Train the Model

In [26]:
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
)

history = model.fit(
    X_train,y_train,
    epochs=100
)

Epoch 1/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 931us/step - loss: 0.0012 
Epoch 2/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 977us/step - loss: 0.0013
Epoch 3/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0027    
Epoch 4/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 965us/step - loss: 9.6209e-04
Epoch 5/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 989us/step - loss: 6.5251e-04
Epoch 6/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 977us/step - loss: 5.3857e-04
Epoch 7/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 980us/step - loss: 4.9821e-04
Epoch 8/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 998us/step - loss: 5.2135e-04
Epoch 9/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 971us/step - loss: 4.6710e-04
Epoch 10/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0

### 5. Check Bias and Variance

In [27]:
def eval_mse(y, yhat):
    m = len(y) 
    err = 0.0 
    for i in range(m):
        err += (y[i] - yhat[i])**2
    err /= 2*m
    return err

In [31]:
crossval_preds = model.predict(X_crossval)
crossval_preds = tf.nn.softmax(crossval_preds)
crossval_preds

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 867us/step


<tf.Tensor: shape=(1600, 10), dtype=float32, numpy=
array([[8.4651454e-20, 3.3022693e-32, 2.2439714e-29, ..., 1.0000000e+00,
        2.6827940e-22, 7.4957461e-15],
       [1.2942257e-21, 7.3018308e-22, 1.1431973e-19, ..., 7.9015767e-23,
        1.0759601e-08, 1.0288114e-23],
       [8.8363150e-25, 1.3244790e-19, 1.4475929e-18, ..., 7.0486529e-20,
        1.3351077e-17, 5.9760226e-13],
       ...,
       [7.2319884e-26, 8.1186812e-34, 1.0000000e+00, ..., 1.0396370e-23,
        1.8665710e-16, 6.0808779e-15],
       [3.0452739e-14, 1.7516579e-14, 1.6461367e-17, ..., 4.1601630e-19,
        5.2894226e-14, 1.0339750e-12],
       [1.7437200e-14, 1.2637465e-06, 9.9678886e-01, ..., 2.5283662e-12,
        2.4039545e-03, 1.7041306e-05]], dtype=float32)>

In [16]:
image_of_two = X[1015] # Pull out an entry in the training data set that you know corresponds to an image of a two 

prediction = model.predict(image_of_two.reshape(1,400)) # Use the model to predict a number based on that entry 
prediction_probs = tf.nn.softmax(prediction) # Use the prediction to identify the probabilities of the entry being each number
yhat = np.argmax(prediction_p)


print(f" predicting a Two: \n{prediction}")
print(f" Largest Prediction index: {np.argmax(prediction)}")

print(f" predicting a Two. Probability vector: \n{prediction_probs}")
print(f"Total of predictions: {np.sum(prediction_probs):0.3f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
 predicting a Two: 
[[-2.6044028   6.332008    7.828155    2.7742958  -5.75013    -2.1812954
  -4.7105036  -0.06585044 -1.030035   -1.6532078 ]]
 Largest Prediction index: 2
 predicting a Two. Probability vector: 
[[2.3928915e-05 1.8195204e-01 8.1231672e-01 5.1863543e-03 1.0297957e-06
  3.6532227e-05 2.9124290e-06 3.0297201e-04 1.1552131e-04 6.1947190e-05]]
Total of predictions: 1.000


In [17]:
def display_errors(model,X,y):
    f = model.predict(X)
    yhat = np.argmax(f, axis=1)
    doo = yhat != y[:,0]
    idxs = np.where(yhat != y[:,0])[0]
    return(len(idxs))

print(f"{display_errors(model,X,y)} errors out of {len(X)} images")

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 782us/step
13 errors out of 5000 images
