In [1]:
# Loading the data from MNIST dataset
from tensorflow.keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

print(train_images.shape, test_images.shape)
print(len(train_labels), len(test_labels))

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

from tensorflow.keras.utils import to_categorical
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

(60000, 28, 28) (10000, 28, 28)
60000 10000


In [2]:
# Create the model based on a DNN
from tensorflow.keras import models
from tensorflow.keras import layers

network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network.add(layers.Dense(256, activation='relu'))
network.add(layers.Dense(10, activation='softmax'))

network.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               401920    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________


In [3]:
# Train with SGD
from tensorflow.keras import optimizers

algo0 = optimizers.SGD(lr=0.01, momentum=0.0)

network.compile(optimizer= algo0,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

network.fit(train_images, train_labels, epochs=5, batch_size=128)

test_loss, test_acc = network.evaluate(test_images, test_labels, verbose = 0)
print(test_loss, test_acc)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.2588534653186798 0.9296000003814697


In [4]:
# Train with SGD with momentum
algo1 = optimizers.SGD(lr=0.01, momentum=0.9)

network.compile(optimizer= algo1,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

network.fit(train_images, train_labels, epochs=5, batch_size=128)

test_loss, test_acc = network.evaluate(test_images, test_labels, verbose = 0)
print(test_loss, test_acc)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.08279784768819809 0.974399983882904


In [5]:
# Train with RMSprop
algo2 = optimizers.RMSprop(lr=0.01, rho=0.99, epsilon=None, decay=0.0)

network.compile(optimizer= algo2,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

network.fit(train_images, train_labels, epochs=5, batch_size=128)

test_loss, test_acc = network.evaluate(test_images, test_labels, verbose = 0)
print(test_loss, test_acc)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.15779882669448853 0.9613999724388123


In [6]:
# Train with Adam
algo3 = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
network.compile(optimizer= algo3,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

network.fit(train_images, train_labels, epochs=5, batch_size=128)

test_loss, test_acc = network.evaluate(test_images, test_labels, verbose = 0)
print(test_loss, test_acc)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.11808105558156967 0.9779000282287598


In [7]:
# Batch normalization
network_bn = models.Sequential()
network_bn.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network_bn.add(layers.BatchNormalization())
network_bn.add(layers.Dense(256, activation='relu'))
network_bn.add(layers.BatchNormalization())
network_bn.add(layers.Dense(10, activation='softmax'))

network_bn.summary()

network_bn.compile(optimizer= algo3,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

network_bn.fit(train_images, train_labels, epochs=5, batch_size=128)

test_loss, test_acc = network_bn.evaluate(test_images, test_labels, verbose = 0)
print(test_loss, test_acc)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 512)               401920    
_________________________________________________________________
batch_normalization (BatchNo (None, 512)               2048      
_________________________________________________________________
dense_4 (Dense)              (None, 256)               131328    
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1024      
_________________________________________________________________
dense_5 (Dense)              (None, 10)                2570      
Total params: 538,890
Trainable params: 537,354
Non-trainable params: 1,536
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.08597444742918015 0.9751999974250793
