In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from keras.models import Sequential
from tensorflow.keras.layers import InputLayer
from keras.layers import Dense, Flatten
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import keras
from keras.datasets import cifar10

from sklearn.metrics import classification_report,confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D, Dropout

from keras.regularizers import l1, l2

from tensorflow.keras.layers import LeakyReLU

In [2]:
# define num_class
num_classes = 10

# load dataset keras will download cifar-10 datset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()  

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [3]:
# Converting class vectors to binary class matrices.
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)

In [4]:
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

In [6]:
# standardizing the input features
# Calculate the mean and standard deviation of the training set
mean = tf.reduce_mean(x_train, axis=(0, 1, 2))
std = tf.math.reduce_std(x_train, axis=(0, 1, 2))

# Standardize the input features using mean and standard deviation
x_train = (x_train - mean) / std
x_test = (x_test - mean) / std

Metal device set to: Apple M1 Pro


2023-05-13 14:12:12.522366: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-05-13 14:12:12.522647: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


# Models

### Relu activation function

In [None]:
# Designing the layer architecture of the DNN model
LAYERS = [
          tf.keras.layers.Flatten(input_shape=[32, 32,3], name="inputLayer"),
          tf.keras.layers.Dense(300, activation="relu", name="hiddenLayer1"),
          tf.keras.layers.Dense(200, activation="relu", name="hiddenLayer2"),
          tf.keras.layers.Dense(100, activation="relu", name="hiddenLayer3"),
          tf.keras.layers.Dense(10, activation="softmax", name="outputLayer"),
]

In [None]:
model_relu = tf.keras.models.Sequential(LAYERS)

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_relu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
history_relu = model_relu.fit(x_train,y_train, 
              epochs=50,batch_size = 128,
              validation_data=(x_test, y_test))

In [None]:
model_relu.evaluate(x_test, y_test)

Training accuracy: 71%
Testing accuracy: 51.38%

### Leaky-relu activation function

In [None]:
# Designing the layer architecture of the DNN model
LAYERS = [
          tf.keras.layers.Flatten(input_shape=[32, 32,3], name="inputLayer"),
          tf.keras.layers.Dense(300, activation=tf.keras.layers.LeakyReLU(alpha=0.01), name="hiddenLayer1"),
          tf.keras.layers.Dense(200, activation=tf.keras.layers.LeakyReLU(alpha=0.01), name="hiddenLayer2"),
          tf.keras.layers.Dense(100, activation=tf.keras.layers.LeakyReLU(alpha=0.01), name="hiddenLayer3"),
          tf.keras.layers.Dense(10, activation="softmax", name="outputLayer"),
]

In [None]:
model_Leakyrelu = tf.keras.models.Sequential(LAYERS)

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_Leakyrelu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
history_Leakyrelu = model_Leakyrelu.fit(x_train,y_train, 
              epochs=50,batch_size = 128,
              validation_data=(x_test, y_test))

In [None]:
model_Leakyrelu.evaluate(x_test, y_test)

Training accuracy: 74.25%
Testing accuracy: 50.3%

### Randomized Leaky Relu

In [None]:
# Custom layer for randomized Leaky ReLU activation
class RandomizedLeakyReLU(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(RandomizedLeakyReLU, self).__init__(**kwargs)

    def call(self, inputs):
        alpha = tf.random.uniform(shape=tf.shape(inputs), minval=0.01, maxval=0.2)
        return tf.maximum(alpha * inputs, inputs)

# Modify the layers to use randomized Leaky ReLU activation
LAYERS = [
    tf.keras.layers.Flatten(input_shape=[32, 32, 3], name="inputLayer"),
    tf.keras.layers.Dense(300, name="hiddenLayer1"),
    RandomizedLeakyReLU(name="hiddenLayer1_activation"),
    tf.keras.layers.Dense(200, name="hiddenLayer2"),
    RandomizedLeakyReLU(name="hiddenLayer2_activation"),
    tf.keras.layers.Dense(100, name="hiddenLayer3"),
    RandomizedLeakyReLU(name="hiddenLayer3_activation"),
    tf.keras.layers.Dense(10, activation="softmax", name="outputLayer")
]

In [None]:
model_RandomizedLeakyrelu = tf.keras.models.Sequential(LAYERS)

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_RandomizedLeakyrelu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
history_RandomizedLeakyrelu = model_RandomizedLeakyrelu.fit(x_train,y_train, 
              epochs=50,batch_size = 128,
              validation_data=(x_test, y_test))

In [None]:
model_RandomizedLeakyrelu.evaluate(x_test, y_test)

Training accuracy: 74%
Testing Accuracy: 54.26%

### Parametric Leaky Relu

In [None]:

# Modify the layers to use Parametric Leaky ReLU activation
LAYERS = [
    tf.keras.layers.Flatten(input_shape=[32, 32, 3], name="inputLayer"),
    tf.keras.layers.Dense(300, name="hiddenLayer1"),
    tf.keras.layers.PReLU(name="hiddenLayer1_activation"),
    tf.keras.layers.Dense(200, name="hiddenLayer2"),
    tf.keras.layers.PReLU(name="hiddenLayer2_activation"),
    tf.keras.layers.Dense(100, name="hiddenLayer3"),
    tf.keras.layers.PReLU(name="hiddenLayer3_activation"),
    tf.keras.layers.Dense(10, activation="softmax", name="outputLayer")
]

In [None]:
model_ParaLeakyrelu = tf.keras.models.Sequential(LAYERS)

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_ParaLeakyrelu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
history_ParaLeakyrelu = model_ParaLeakyrelu.fit(x_train,y_train, 
              epochs=50,batch_size = 128,
              validation_data=(x_test, y_test))

In [None]:
model_ParaLeakyrelu.evaluate(x_test, y_test)

training accuracy: 75.88%
testing accuracy: 51.1%

### Exponential Linear Unit

In [None]:
# Modify the layers to use ELU activation
LAYERS = [
    tf.keras.layers.Flatten(input_shape=[32, 32, 3], name="inputLayer"),
    tf.keras.layers.Dense(300, activation="elu", name="hiddenLayer1"),
    tf.keras.layers.Dense(200, activation="elu", name="hiddenLayer2"),
    tf.keras.layers.Dense(100, activation="elu", name="hiddenLayer3"),
    tf.keras.layers.Dense(10, activation="softmax", name="outputLayer")
]

In [None]:
model_elu = tf.keras.models.Sequential(LAYERS)

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_elu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
history_elu = model_elu.fit(x_train,y_train, 
              epochs=50,batch_size = 128,
              validation_data=(x_test, y_test))

In [None]:
model_elu.evaluate(x_test, y_test)

training accuracy: 82%
testing accuracy: 51.2%

### SELU

In [None]:
# Modify the layers to use ELU activation
LAYERS = [
    tf.keras.layers.Flatten(input_shape=[32, 32, 3], name="inputLayer"),
    tf.keras.layers.Dense(300, activation="selu", kernel_initializer = "lecun_normal" ,name="hiddenLayer1"),
    tf.keras.layers.Dense(200, activation="selu", kernel_initializer = "lecun_normal" , name="hiddenLayer2"),
    tf.keras.layers.Dense(100, activation="selu", kernel_initializer = "lecun_normal" , name="hiddenLayer3"),
    tf.keras.layers.Dense(10, activation="selu", kernel_initializer = "lecun_normal" , name="outputLayer")
]

In [None]:
model_selu = tf.keras.models.Sequential(LAYERS)

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_selu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
history_selu = model_selu.fit(x_train,y_train, 
              epochs=50,batch_size = 128,
              validation_data=(x_test, y_test))

### Batch normalization

In [None]:
model_bn = keras.models.Sequential([
    keras. layers.Flatten(input_shape= [32,32,3]),
    keras. layers. BatchNormalization(), 
    keras. layers.Dense(300, kernel_initializer="he_normal", use_bias=False),
    keras.layers.BatchNormalization(), 
    keras. layers.Activation("elu"),
    keras. layers. Dense(100, kernel_initializer="he_normal", use_bias=False), 
    keras. layers.Activation("elu"), 
    keras. layers.BatchNormalization(), 
    keras.layers.Dense(10, activation="softmax")
])

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_bn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
history_bn = model_bn.fit(x_train,y_train, 
              epochs=50,batch_size = 128,
              validation_data=(x_test, y_test))

In [None]:
model_bn.evaluate(x_test, y_test)

Training accuracy: 91%
Testing accuracy: 52.3%

## Learning rate Scheduler

1. Exponential decay

In [None]:
def exponential_decay(epoch,lr):
    return lr*0.1**(1/20)


In [None]:
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay)
history_bn2 = model_bn.fit(x_train,y_train,
                          epochs=50,batch_size=32,
                          validation_data=(x_test,y_test),
                          callbacks=[lr_scheduler])

In [None]:
model_bn.evaluate(x_test, y_test)

Training accuracy: 74%
Testing accuracy: 55.36%

2. Performance Scheduling

In [None]:
lr_performanceScheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5,patience=5)

In [None]:
history_bn3 = model_bn.fit(x_train,y_train,
                          epochs=50,batch_size=32,
                          validation_data=(x_test,y_test),
                          callbacks=[lr_performanceScheduler])

In [None]:
model_bn.evaluate(x_test, y_test)

Training Accuracy: 81%
Testing Accuracy: 55.6%


### Fixing Overfitting

#### 1. L1 and L2 regularizers

In [None]:
model_bn_l1_l2 = keras.models.Sequential([
    keras. layers.Flatten(input_shape= [32,32,3]),
    keras. layers. BatchNormalization(), 
    keras. layers.Dense(300, kernel_initializer="he_normal", use_bias=False,kernel_regularizer = keras.regularizers.l1_l2(0.01)),
    keras.layers.BatchNormalization(), 
    keras. layers.Activation("elu"),
    keras. layers. Dense(100, kernel_initializer="he_normal", use_bias=False,kernel_regularizer = keras.regularizers.l1_l2(0.01)), 
    keras. layers.Activation("elu"), 
    keras. layers.BatchNormalization(), 
    keras.layers.Dense(10, activation="softmax")
])

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_bn_l1_l2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
history_bn_l1_l2 = model_bn_l1_l2.fit(x_train,y_train, 
              epochs=50,batch_size = 32,
              validation_data=(x_test, y_test))

In [None]:
model_bn_l1_l2.evaluate(x_test,y_test)

Training Accuracy: 35%
Testing Accuracy: 35.6%

#### 2. Dropout technique

We will force dropout technique to be active during both training and validation, this is to get the exact idea if the model is overfitting or not.

Also we will add dropout layer only after the last hidden layer, since this is followed by most architectures.

In [None]:
model_bn_dropout = keras.models.Sequential([
    keras. layers.Flatten(input_shape= [32,32,3]),
    keras. layers. BatchNormalization(), 
    keras. layers.Dense(300, kernel_initializer="he_normal", use_bias=False),
    keras.layers.BatchNormalization(), 
    keras. layers.Activation("elu"),
    keras. layers. Dense(100, kernel_initializer="he_normal", use_bias=False), 
    keras. layers.Activation("elu"), 
    keras. layers.BatchNormalization(), 
    keras.layers.Dropout(rate=0.5),
    keras.layers.Dense(10, activation="softmax")
])

In [None]:
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]

model_bn_dropout.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

In [None]:
with keras.backend.learning_phase_scope(1):
    history_bn_dropout = model_bn_dropout.fit(x_train,y_train, 
                  epochs=50,batch_size = 32,
                  validation_data=(x_test, y_test))

In [None]:
model_bn_dropout.evaluate(x_test,y_test)

### Final model by default DNN configurations

1. Kernel_initializer = LeCun Initialization
2. Activation function: SELU
3. Normalization = none
4. regularization = early stopping
5. optimizer = nadam
6. learning rate scheduler = performance scheduling

In [None]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[32,32,3]),
    keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
    keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
    keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])

In [None]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)

In [None]:
lr_performanceScheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5,patience=5)

In [None]:
history_finalDNN = model.fit(x_train,y_train,
                          epochs=50,batch_size=32,
                          validation_data=(x_test,y_test),
                          callbacks=[lr_performanceScheduler, early_stopping_cb])