In [None]:
from time import time
from tensorflow import device
from tensorflow.test import gpu_device_name
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.datasets.mnist import load_data
from tensorflow.keras.initializers import GlorotUniform, HeUniform
import matplotlib.pyplot as plt
plt.style.use("ggplot")
%matplotlib notebook

In [None]:
(x_train, y_train), (x_test, y_test) = load_data("mnist.npz")

In [None]:
device_name = gpu_device_name()
callback = EarlyStopping(monitor="sparse_categorical_accuracy", patience=3)
print(device_name)
model_labels = []
various_models_accuracies = []

In [None]:
# model - 1 (shallow)
start = time()
with device(device_name):
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    model.add(Dense(397, activation="relu"))
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    various_models_accuracies.append(accuracies)
    model_labels.append("shallow-1")
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    print("time taken: {}s".format(time() - start))

In [None]:
# model - 2 (shallow)
start = time()
with device(device_name):
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    model.add(Dense(526, activation="relu"))
    model.add(Dense(268, activation="relu"))
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    various_models_accuracies.append(accuracies)
    model_labels.append("shallow-2")
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    print("time taken: {}s".format(time() - start))

In [None]:
# model - 3 (change batch size)
start = time()
with device(device_name):
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(3):
        model.add(Dense(128, activation="relu"))
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=1024,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=len(x_test))
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("change bach size (1024)")
    print("time taken: {}s".format(time() - start))

In [None]:
# model - 4 (Adam)
start = time()
with device(device_name):
    units = 512
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, activation="relu"))
        units //= 2
    model.add(Dense(10, activation="softmax"))
    optim = Adam(learning_rate=0.001)
    model.compile(optimizer=optim,
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("with adam optim")
    print("time taken: {}s".format(time() - start))

In [None]:
# model - 5 (SGD with momentum)
start = time()
with device(device_name):
    units = 512
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, activation="relu"))
        units //= 2
    model.add(Dense(10, activation="softmax"))
    optim = SGD(learning_rate=0.0001, momentum=0.9, nesterov=False)
    model.compile(optimizer=optim,
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("with momentum optim")
    print("time taken: {}s".format(time() - start))

In [None]:
# model - 6 (Glorot Uniform)
start = time()
initializer = GlorotUniform()
with device(device_name):
    units = 512
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, 
                        activation="relu", 
                        kernel_initializer=initializer))
        units //= 2
    model.add(Dense(10, activation="softmax", kernel_initializer=initializer))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("with glorot uniform")
    print("time taken: {}s".format(time() - start))

In [None]:
# model - 7 (He Uniform)
start = time()
initializer = HeUniform()
with device(device_name):
    units = 512
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, 
                        activation="relu", 
                        kernel_initializer=initializer))
        units //= 2
    model.add(Dense(10, activation="softmax", kernel_initializer=initializer))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("with He uniform")
    print("time taken: {}s".format(time() - start))

In [None]:
#model 8 - autoencoder type
start = time()
with device(device_name):
    units = 1024
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    while units > 8:
        units //= 2
        model.add(Dense(units, activation="relu"))
    while units < 64:
        units *= 2
        model.add(Dense(units, activation="relu"))

    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("autoencoder-based")
    print("time taken: {}s".format(time() - start))

## Compare the influence of activation functions while keeping other parameters the same

In [None]:
# model - 9 (tanh)
start = time()
with device(device_name):
    units = 512
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, activation="tanh"))
        units //= 2
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("5 hidden layers with tanh")
    print("time taken: {}s".format(time() - start))

In [None]:
#model - 10 (sigmoid)
start = time()
with device(device_name):
    units = 512
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, activation="sigmoid"))
        units //= 2
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("5 hidden layers with sigmoid")
    print("time taken: {}s".format(time() - start))

In [None]:
#model - 11 (relu)
start = time()
with device(device_name):
    units = 512
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, activation="relu"))
        units //= 2
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("5 hidden layers with relu")
    print("time taken: {}s".format(time() - start))

In [None]:
##Deep NN to show the effect of overfitting
#model 12- without dropout
start = time()
with device(device_name):
    units = 700
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, activation="relu"))
        units -= 128 #arithmetic progression
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1,
                    validation_split=0.75)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("5 hidden layers no dropout")
    print("time taken: {}s".format(time() - start))

In [None]:
#model 13- with regularization
start = time()
with device(device_name):
    units = 700
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28, 1)))
    for _ in range(5):
        model.add(Dense(units, activation="relu"))
        model.add(Dropout(0.2))
        units -= 128 #arithmetic progression
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["sparse_categorical_accuracy"])
    log = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1,
                    validation_split=0.75)
    _, acc = model.evaluate(x_test, y_test, batch_size=64)
    accuracies = log.history["sparse_categorical_accuracy"]
    print("train acc: {}\ntest acc. {}".format(accuracies[-1], acc))
    various_models_accuracies.append(accuracies)
    model_labels.append("5 Hidden layers, with dropout")
    print("time taken: {}s".format(time() - start))

In [None]:
plt.title("accuracies of various DL models")
for (acc, label) in zip(various_models_accuracies, model_labels):
    plt.plot(range(1, len(acc)+1), acc, label=label)
ax = plt.subplot(111)
plt.xlabel("epochs")
plt.ylabel("accuracies")
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width*0.8, box.height])
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
plt.show()