In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import tensorflow as tf
from tensorflow import keras

In [17]:
%load_ext tensorboard

In [3]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()

In [7]:
X_train_full.shape, y_train_full.shape, X_test.shape, y_test.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=0.1)

In [5]:
X_train.shape, X_valid.shape

((45000, 32, 32, 3), (5000, 32, 32, 3))

In [8]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [9]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for i in range(20):
    model.add(keras.layers.Dense(100, 
                                kernel_initializer=keras.initializers.he_normal, 
                                activation=keras.activations.elu))
model.add(keras.layers.Dense(10, activation="softmax"))

In [11]:
import os
if "ANN" in os.path.abspath(os.curdir): os.chdir("..")
BASE_DIR = os.path.abspath(os.curdir)
BASE_DIR

'D:\\TheCompleteML\\projects'

In [54]:
if "ANN" not in os.path.abspath(os.curdir): 
    os.chdir(os.path.abspath(os.path.join(os.curdir, "ANN")))

In [12]:
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("cifar10_best_model.h5", save_best_only=True)
early_stopping = keras.callbacks.EarlyStopping(patience=20)

run_index = 1 # increment this at every run
learning_rate = 1e-4
logdir = os.path.join(BASE_DIR, "logs", "dnn_cifar10_logs")
run_logdir = os.path.join(logdir, "run_{:03d}_lr_{:.6f}".format(run_index, learning_rate))

tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

In [13]:
model.compile(loss=keras.losses.sparse_categorical_crossentropy, 
             optimizer=keras.optimizers.Nadam(learning_rate=learning_rate), 
             metrics=keras.metrics.sparse_categorical_accuracy)

In [14]:
callbacks = [model_checkpoint_cb, early_stopping, tensorboard_cb]

In [15]:
history = model.fit(X_train, y_train, epochs=10, 
                    validation_data=(X_valid, y_valid),
                    callbacks=callbacks)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [40]:
log_path = 'D:\TheCompleteML\projects\logs\dnn_cifar10_logs'

In [55]:
%tensorboard --logdir=./dnn_cifar10_logs --port=6006

Reusing TensorBoard on port 6006 (pid 16044), started 0:01:28 ago. (Use '!kill 16044' to kill it.)

In [56]:
os.path.abspath(os.curdir)

'D:\\TheCompleteML\\projects\\ANN'

In [46]:
os.path.abspath(os.path.join(os.curdir, "ANN"))

'D:\\TheCompleteML\\projects\\ANN'

In [None]:
pd.DataFrame(history.history).plot(figsize=(15, 8))
plt.grid(True)
plt.gca().set_ylim(0, 5)
plt.show()

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42) 

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
model.add(keras.layers.BatchNormalization())
for i in range(20):
    model.add(keras.layers.Dense(100, 
                                kernel_initializer=keras.initializers.he_normal, 
                                activation=keras.activations.elu))
    model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(10, 
                            activation="softmax",
                            kernel_initializer=keras.initializers.glorot_uniform))

In [None]:
model.compile(loss=keras.losses.sparse_categorical_crossentropy, 
             optimizer=keras.optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999), 
             metrics=keras.metrics.sparse_categorical_accuracy)

In [None]:
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), 
         callbacks=[keras.callbacks.EarlyStopping(patience=10)])

In [None]:
pd.DataFrame(history.history).plot(figsize=(15, 8))
plt.grid(True)
# plt.gca().set_ylim(0, 1)
plt.show()

##### Observations

Convergence to optimum value is much more effective compare to precious result but the time complexity is high, and oveall accuracy is also improved.

### Implementing SELU as activations functions

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, 1)).reshape(-1, 32, 32, 3)
X_valid = scaler.transform(X_valid.reshape(-1, 1)).reshape(-1, 32, 32, 3)
X_test = scaler.transform(X_test.reshape(-1, 1)).reshape(-1, 32, 32, 3)

In [None]:
X_train.shape

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(keras.layers.Dense(100,
                                 activation="selu",
                                 kernel_initializer=keras.initializers.lecun_normal))
model.add(keras.layers.Dense(10, activation="softmax"))

In [None]:
model.compile(loss=keras.losses.sparse_categorical_crossentropy,
             optimizer=keras.optimizers.Nadam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999),
             metrics=keras.metrics.sparse_categorical_accuracy)

In [None]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))