In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

In [2]:
[name for name in dir(keras.initializers) if not name.startswith('_')]

['Constant',
 'GlorotNormal',
 'GlorotUniform',
 'HeNormal',
 'HeUniform',
 'Identity',
 'Initializer',
 'LecunNormal',
 'LecunUniform',
 'Ones',
 'Orthogonal',
 'RandomNormal',
 'RandomUniform',
 'TruncatedNormal',
 'VarianceScaling',
 'Zeros',
 'constant',
 'deserialize',
 'get',
 'glorot_normal',
 'glorot_uniform',
 'he_normal',
 'he_uniform',
 'identity',
 'lecun_normal',
 'lecun_uniform',
 'ones',
 'orthogonal',
 'random_normal',
 'random_uniform',
 'serialize',
 'truncated_normal',
 'variance_scaling',
 'zeros']

In [3]:
keras.layers.Dense(units=10, activation='relu',kernel_initializer='he_normal')

<keras.src.layers.core.dense.Dense at 0x291d76610>

2.Activate Function

Method 1 (keras.activations)

In [4]:
[name for name in dir(keras.activations) if not name.startswith('_')]

['deserialize',
 'elu',
 'exponential',
 'gelu',
 'get',
 'hard_sigmoid',
 'linear',
 'mish',
 'relu',
 'selu',
 'serialize',
 'sigmoid',
 'softmax',
 'softplus',
 'softsign',
 'swish',
 'tanh']

In [5]:
keras.layers.Dense(10, activation='selu', kernel_initializer='lecun_normal')

<keras.src.layers.core.dense.Dense at 0x295a38290>

Method 2 (keras.layers)

In [6]:
[name for name in dir(keras.layers) if 'elu' in name.lower()]

['ELU', 'LeakyReLU', 'PReLU', 'ReLU', 'ThresholdedReLU']

In [7]:
keras.layers.Dense(300, kernel_initializer='he_normal'), keras.layers.LeakyReLU(alpha=.3)

(<keras.src.layers.core.dense.Dense at 0x295ab71d0>,
 <keras.src.layers.activation.leaky_relu.LeakyReLU at 0x295a93e50>)

Method 3 (keras.layers.Activation)

In [8]:
keras.layers.Dense(300, kernel_initializer='he_normal'), keras.layers.Activation('relu')

(<keras.src.layers.core.dense.Dense at 0x1753b4a90>,
 <keras.src.layers.core.activation.Activation at 0x294e95510>)

Ex1

In [9]:
#Load data
from tensorflow.keras.datasets import fashion_mnist
(xTrainSet, yTrainSet), (xTest, yTest) = fashion_mnist.load_data()

#Split data
from sklearn.model_selection import train_test_split
xTrain , xValid, yTrain, yValid = train_test_split(xTrainSet, yTrainSet, random_state=1)

#Preprocessing
xTrain = xTrain / 255
xValid = xValid / 255
xTest = xTest / 255

In [10]:
tf.keras.backend.clear_session()
np.random.seed(1)
tf.random.set_seed(1)

In [11]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(300, activation='relu', kernel_initializer='he_normal'),
    keras.layers.Dense(200, kernel_initializer='he_normal'),
    keras.layers.LeakyReLU(alpha=.01),
    keras.layers.Dense(100, kernel_initializer='he_normal'),
    keras.layers.LeakyReLU(alpha=.01),
    keras.layers.Dense(100, kernel_initializer='he_normal'),
    keras.layers.LeakyReLU(alpha=.01),
    keras.layers.Dense(100, kernel_initializer='he_normal'),
    keras.layers.PReLU(),
    keras.layers.Dense(100, kernel_initializer='he_normal'),
    keras.layers.LeakyReLU(alpha=.01),
    keras.layers.Dense(10, activation='softmax')
])

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 dense_1 (Dense)             (None, 200)               60200     
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 200)               0         
                                                                 
 dense_2 (Dense)             (None, 100)               20100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_3 (Dense)             (None, 100)               1

In [13]:
model.compile(loss='sparse_categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])

In [14]:
train = model.fit(xTrain, yTrain, epochs=2, validation_data=(xValid, yValid))

Epoch 1/2
Epoch 2/2


Batch Nomarlization, BN

Method 1 : After activation function

In [15]:
keras.layers.Dense(300, activation = 'relu'), keras.layers.BatchNormalization()

(<keras.src.layers.core.dense.Dense at 0x2a14e4790>,
 <keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x2a0aab410>)

Ex 2

In [16]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(200, activation='relu', kernel_initializer='he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation='relu', kernel_initializer='he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation='softmax'),
])

In [17]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_1 (Bat  (None, 784)               3136      
 chNormalization)                                                
                                                                 
 dense_8 (Dense)             (None, 200)               157000    
                                                                 
 batch_normalization_2 (Bat  (None, 200)               800       
 chNormalization)                                                
                                                                 
 dense_9 (Dense)             (None, 100)               20100     
                                                                 
 batch_normalization_3 (Bat  (None, 100)              

In [18]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

In [19]:
train = model.fit(xTrain, yTrain, epochs=2, validation_data=(xValid, yValid))

Epoch 1/2
Epoch 2/2


Method 2 : Before activation function

In [20]:
keras.layers.Dense(300, use_bias=False),
keras.layers.BatchNormalization(),
keras.layers.Activation('relu')

<keras.src.layers.core.activation.Activation at 0x2a1be1cd0>

Ex 3

In [21]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(200, use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.Dense(100, use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha=.3),
    keras.layers.Dense(10, activation='softmax'),
])

In [22]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_5 (Bat  (None, 784)               3136      
 chNormalization)                                                
                                                                 
 dense_12 (Dense)            (None, 200)               156800    
                                                                 
 batch_normalization_6 (Bat  (None, 200)               800       
 chNormalization)                                                
                                                                 
 activation_1 (Activation)   (None, 200)               0         
                                                                 
 dense_13 (Dense)            (None, 100)              

In [23]:
model.compile(loss='sparse_categorical_crossentropy',optimizer=keras.optimizers.SGD(learning_rate=1e-3), metrics=['accuracy'])



In [24]:
train = model.fit(xTrain, yTrain, epochs=2, validation_data=(xValid, yValid))

Epoch 1/2
Epoch 2/2


4.Gradient Clipping

Method 1 : Clipvalue

In [25]:
opt = keras.optimizers.SGD(clipvalue=1.0)



Method 2 : Clipnorm

In [26]:
opt = keras.optimizers.SGD(clipnorm=0.9)



Ex 4

In [27]:
opt = keras.optimizers.SGD(learning_rate=1e-3, clipnorm=0.9)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])



In [28]:
train = model.fit(xTrain,yTrain, epochs=2, validation_data=(xValid, yValid))

Epoch 1/2
Epoch 2/2
