In [0]:
!pip3 install -U tensorflow==2.0.0 --quiet


In [0]:
import tensorflow as tf
tf.random.set_seed(42)

In [0]:
#Collect Data

In [0]:
(trainX,trainY),(testX,testY) = tf.keras.datasets.mnist.load_data()

In [5]:
trainX.shape

(60000, 28, 28)

In [6]:
testX.shape

(10000, 28, 28)

### Convert Output label to multiple value

In [0]:
# Convert labels to one hot encoding
# We always deal with numerical values, hence we do one hot encoding
# The output or labels in this case is a set of numbers, currently the label is one number. To compare the one number with the actual 10 different values we 
# do one hot encoding
trainY = tf.keras.utils.to_categorical(trainY, num_classes=10)
testY = tf.keras.utils.to_categorical(testY, num_classes=10)

In [8]:
testY[0]

array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)

The above result represents the first value of being 0 is 0 %. The value of 7 as 100%


### Build the Graph

In [0]:
#Clear out Tensorflow memory
tf.keras.backend.clear_session()

#Initialize sequential model
model = tf.keras.models.Sequential()

# How many weights or neurons are there in Reshape layer?
# Machine is not learning anything, hence no weights or neurons in reshape layer

#Reshape data from 2D to 1D -> 28x28 to 784. Dense layer can work only with vectors
model.add(tf.keras.layers.Reshape((784,), input_shape=(28,28,)))

# Why should we reshape?
# we have 60000 samples with each sample having 28x28 matrix (2 dimensions)
# y= wx+b will work only with vectors, hence we are reshaping the matrix from 28*28 int 784 sized vector. X has to be a Vector
# the above operation is carried inside the model.
# The input_shape= (28,28, <Empty>). The empty denotes that we feed any number of examples. The third number is reserved for batch size

#Normalize the data
model.add(tf.keras.layers.BatchNormalization())

# Build the Graph

We build a model with 4 hidden layers.

The number of neurons will be 200, 100, 60 and 30 respectively.

Both, the number of hidden layers and number ofneurons in each hidden layer are hyper parameters. That is, we can change these values to improve the model. The output of each neuron in hidden layer will be passed through an activation function.

Can we have a NN without Activation Function?
Yes, but no activation function would be used. It will still work but it would not learn any non-linear relationships

What is the default activation function?






In [0]:
#Add 1st hidden layer
model.add(tf.keras.layers.Dense(200, activation ='sigmoid'))

In [0]:
#Add 2st hidden layer
model.add(tf.keras.layers.Dense(100, activation ='sigmoid'))

In [0]:
#Add 3rd hidden layer
model.add(tf.keras.layers.Dense(60, activation ='sigmoid'))

In [0]:
#Add 4th hidden layer
model.add(tf.keras.layers.Dense(30, activation ='sigmoid'))

In [0]:
#Add output layer
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))

Build the Graph


In [0]:
# Compile the model
# For classification
# 2types
# Softmax - Categorical Cross Entropy
# Sigmoid - Binary Cross Entropy
 model.compile(optimizer='sgd', loss = 'categorical_crossentropy', metrics=['accuracy'])

### Review Model 

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 784)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 784)               3136      
_________________________________________________________________
dense (Dense)                (None, 200)               157000    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               20100     
_________________________________________________________________
dense_2 (Dense)              (None, 60)                6060      
_________________________________________________________________
dense_3 (Dense)              (None, 30)                1830      
_________________________________________________________________
dense_4 (Dense)              (None, 10)                3

### Train the model


In [18]:
model.fit(trainX,
          trainY, 
          validation_data=(testX, testY), 
          epochs=5,
          batch_size=32)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fb7d58624a8>

#### From the above results we infer that we are experienceing 'Vanishing Gradient'

After 5 Iterations we reached only 32% Accuracy. 

In [19]:
model.get_weights()

[array([1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.9999997 , 0.9999983 , 0.9999973 ,
        1.0000027 , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.9999992 , 0.9999989 , 1.0000032 ,
        1.0000027 , 0.99998814, 0.9999611 , 1.0000191 , 0.9999765 ,
        1.000026  , 1.0000294 , 0.9999089 , 1.0000067 , 0.999983  ,
        1.0001025 , 0.999895  , 1.0000507 , 0.9999514 , 0.9999876 ,
        1.0000043 , 1.000002  , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.9999999 , 0.99999696,
        1.0000067 , 1.0000321 , 1.0000068 , 0.9999765 , 1.0002865 ,
        0.9996607 , 1.0007237 , 1.0008478 , 1.0018437 , 0.9991938 ,
        1.0006233 , 1.0012368 , 0.9982351 , 0.99

In [0]:
model.save('mnist_dnn_v1.h5')