# Implementation of Optimizers

NOTE: The only change is in defining the Optimizer, rest of the process and code are same.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras import regularizers

In [2]:
mnist = tf.keras.datasets.mnist
(x_train_full, y_train_full), (x_test, y_test) = mnist.load_data()

# Creating validataion from train_full data
x_valid, x_train = x_train_full[:5000], x_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

# scaling the dataset for Normalization
x_train = x_train / 255.0
x_valid = x_valid / 255.0
x_test = x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [3]:
model2 = Sequential()
model2.add(Flatten(input_shape = [28, 28], name = "inputlayer"))

model2.add(Dense(64, activation = "relu", name = "hiddenlayer1", kernel_initializer= tf.keras.initializers.HeNormal(seed = None)))
model2.add(Dropout(0.2))    # Dropping 20% nodes for the above layer(HL1)

model2.add(Dense(32, activation = "relu", name = "hiddenlayer2", kernel_initializer= tf.keras.initializers.HeUniform(seed = None)))
model2.add(Dropout(0.1))     # Dropping 10% nodes for the above layer(HL2)

model2.add(Dense(16, activation = "relu", name = "hiddenlayer3", kernel_initializer= tf.keras.initializers.GlorotNormal(seed = None)))
model2.add(Dropout(0.2))     # Dropping 20% nodes for the above layer(HL3)

model2.add(Dense(10, activation= "softmax", name = "outputlayer"))


  super().__init__(**kwargs)


In [7]:
model2.summary()

## Training of an ANN classifier

In [4]:
LOSS_FUNCTION = "sparse_categorical_crossentropy"
METRICS = ["accuracy"]

# Method1
# OPTIMIZER = "SGD"
# model2.compile(loss=LOSS_FUNCTION, optimizer = OPTIMIZER, metrics = METRICS)

# Method2
sgd = tf.keras.optimizers.SGD(learning_rate= 0.01, momentum= 0.9)
model2.compile(loss=LOSS_FUNCTION, optimizer = sgd, metrics = METRICS)

EPOCHS = 5
VALIDATON_SET = (x_valid, y_valid)

history1 = model2.fit(x_train, y_train, epochs = EPOCHS, validation_data = VALIDATON_SET, batch_size= 32)

Epoch 1/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.6510 - loss: 1.0316 - val_accuracy: 0.9446 - val_loss: 0.2105
Epoch 2/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8899 - loss: 0.3779 - val_accuracy: 0.9532 - val_loss: 0.1884
Epoch 3/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9142 - loss: 0.3067 - val_accuracy: 0.9618 - val_loss: 0.1549
Epoch 4/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9248 - loss: 0.2682 - val_accuracy: 0.9656 - val_loss: 0.1357
Epoch 5/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9327 - loss: 0.2346 - val_accuracy: 0.9616 - val_loss: 0.1424


In [5]:
adam = tf.keras.optimizers.Adam(
    learning_rate=0.001,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07,
    amsgrad=False,
    weight_decay=None,
    clipnorm=None,
    clipvalue=None,
    global_clipnorm=None,
    use_ema=False,
    ema_momentum=0.99,
    ema_overwrite_frequency=None,
    loss_scale_factor=None,
    gradient_accumulation_steps=None,
    name="adam")

model2.compile(loss = LOSS_FUNCTION, optimizer= adam, metrics = METRICS)

In [6]:
history1 = model2.fit(x_train, y_train, epochs = EPOCHS, validation_data = VALIDATON_SET, batch_size= 32)

Epoch 1/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.9388 - loss: 0.2163 - val_accuracy: 0.9696 - val_loss: 0.1145
Epoch 2/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9463 - loss: 0.1904 - val_accuracy: 0.9660 - val_loss: 0.1235
Epoch 3/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9469 - loss: 0.1868 - val_accuracy: 0.9714 - val_loss: 0.1160
Epoch 4/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.9519 - loss: 0.1679 - val_accuracy: 0.9734 - val_loss: 0.1030
Epoch 5/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.9532 - loss: 0.1683 - val_accuracy: 0.9702 - val_loss: 0.1061


# NOTE: Here ADAM optimizer is giving better accuracies(both train and valid data)