<a href="https://colab.research.google.com/github/jonkrohn/DLTFpT/blob/master/notebooks/deep_net_in_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Neural Network in TensorFlow

#### Load dependencies

In [1]:
import tensorflow
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout # new!
from tensorflow.keras.layers import BatchNormalization # new!
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt

#### Load data

In [2]:
(X_train, y_train), (X_valid, y_valid) = mnist.load_data()

#### Preprocess data

In [3]:
X_train = X_train.reshape(60000, 784).astype('float32')
X_valid = X_valid.reshape(10000, 784).astype('float32')

In [4]:
X_train /= 255
X_valid /= 255

In [5]:
n_classes = 10
y_train = to_categorical(y_train, n_classes)
y_valid = to_categorical(y_valid, n_classes)

#### Design neural network architecture

In [6]:
model = Sequential()

# First Hidden Layer:
model.add(Dense(64, activation='relu', input_shape=(784,)))
# batch normalization, converts the skewed output of the hidden layers (activation outputs) to normal distribution.
# So next layer output won't be biased towards one class.
# Note this bactchNormalization() even though it is mentioned as model.add - it will not add a hidden layer. 
model.add(BatchNormalization())

# secon hidden layer:
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())

# third hidden layer:
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
# applying dropout for final hidden layer,
# try with dropout of 20% neurons in this layer.
model.add(Dropout(0.2))

# output layer:
model.add(Dense(10, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# check the output of this cell, 
# there is some entry with name Non-trainable params: 384
# they won't be considered for weights updates as the training goes on.
model.summary()

#### Configure model

In [8]:
# we are changing the optimizer from SGD - Stochastic Gradient Descent to Nadam.
model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])

#### Train!

In [9]:
# Since we have added more hidden layers, let's reduce the number of epochs from 20 to 10.
model.fit(X_train, y_train, batch_size=128, epochs=10, verbose=1, validation_data=(X_valid, y_valid))

Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7809 - loss: 0.7224 - val_accuracy: 0.9506 - val_loss: 0.1602
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9528 - loss: 0.1589 - val_accuracy: 0.9624 - val_loss: 0.1149
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9650 - loss: 0.1146 - val_accuracy: 0.9665 - val_loss: 0.1061
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9733 - loss: 0.0908 - val_accuracy: 0.9701 - val_loss: 0.0944
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9773 - loss: 0.0726 - val_accuracy: 0.9703 - val_loss: 0.0955
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9789 - loss: 0.0642 - val_accuracy: 0.9730 - val_loss: 0.0899
Epoch 7/10
[1m469/469[0m 

<keras.src.callbacks.history.History at 0x30ebfe250>

#### Performing inference

In [10]:
valid_0 = X_valid[0].reshape(1, 784)

In [11]:
model.predict(valid_0)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


array([[8.2569869e-07, 2.3145769e-06, 3.4069772e-06, 9.3717796e-07,
        3.0619634e-07, 3.5491871e-07, 2.1951893e-08, 9.9998701e-01,
        2.1446294e-07, 4.5514112e-06]], dtype=float32)

In [12]:
# model.predict_classes(valid_0)

In [13]:
# The predict_classes() method no longer exists in recent TensorFlow releases.
# Instead you could use:
import numpy as np
np.argmax(model.predict(valid_0), axis=-1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


array([7])