In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
import tensorflow as tf

physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
   tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [6]:
from tensorflow.keras.datasets import mnist

In [7]:
# we are downloading the MNIST dataset and splitting the data for training and testing
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [8]:
type(X_train)

numpy.ndarray

In [9]:
X_train.shape

(60000, 28, 28)

In [10]:
X_test.shape

(10000, 28, 28)

In [11]:
#Reshaping our training and testing datatset using numpy's reshape function which we will feed to the model
X_train = X_train.reshape(X_train.shape[0], 28, 28,1)
X_test = X_test.reshape(X_test.shape[0], 28, 28,1)

In [12]:
X_train.shape

(60000, 28, 28, 1)

In [13]:
#Doing type conversion or changing the datatype to float32 for the data
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
#Doing standardization or normalization here dividind each pixel by 255 in the train and test data
X_train /= 255
X_test /= 255

In [14]:

#Checking first 10 image labels
y_train[:10]

array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=uint8)

In [15]:
import tensorflow
# Convert 1-dimensional class arrays to 10-dimensional class matrices
# simply we can say we are doing sort of onehot encoding
Y_train = tensorflow.keras.utils.to_categorical(y_train, 10)
Y_test = tensorflow.keras.utils.to_categorical(y_test, 10)

In [16]:

# having a look in the first 10 datapoints after onehot encoding
Y_train[:10]


array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)

In [20]:
from tensorflow.keras.models import Sequential
# importing different layers and activations from keras.layers
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Add
from tensorflow.keras.layers import Conv2D, MaxPooling2D

In [21]:
# importing Activation, BatchNormalization and MaxPooling2D from tensorflow.keras.layers for performing maxpooling and batchnormalizing operations and adding non linearity via activation functions
from tensorflow.keras.layers import Activation,BatchNormalization

# building our sequential model using the Sequential class and creating the model object
model = Sequential()

# Performing 2dconvolution followed by BatchNormalization and Dropout
model.add(Conv2D(8, (3, 3), activation='relu', input_shape=(28,28,1))) #Output Dim = 26x26x8     
model.add(BatchNormalization())
model.add(Dropout(0.1))

# Performing 2dconvolution followed by BatchNormalization and Dropout        
model.add(Conv2D(8, (3, 3), activation='relu'))          # 24x24                  
model.add(Dropout(0.1))

model.add(MaxPooling2D(pool_size=(2, 2)))      # 12x12

model.add(Conv2D(10, (2, 2), activation='relu')) # 11x11                     
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(12, (3, 3), activation='relu'))    # 9x9                  
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(12, (3, 3), activation='relu'))     # 7x7                 
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(12, (2, 2), activation='relu'))   # 6x6                         
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16, (3, 3), activation='relu'))   # 4x4                      
model.add(BatchNormalization())
model.add(Dropout(0.1))

# Performing only 2dconvolution at the last convolution layer(no batchnormalization and dropout)
model.add(Conv2D(10, (4, 4)))                                           # using 4x4 kernel to see the complete image

# Here we are Flateening our dat i.e making it one dimensional which we will feed to the network.
model.add(Flatten())
#Using softmax activation function at the last layer which is used for multi class classification
model.add(Activation('softmax'))

model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 26, 26, 8)         80        
_________________________________________________________________
batch_normalization_6 (Batch (None, 26, 26, 8)         32        
_________________________________________________________________
dropout_7 (Dropout)          (None, 26, 26, 8)         0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 24, 24, 8)         584       
_________________________________________________________________
dropout_8 (Dropout)          (None, 24, 24, 8)         0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 8)         0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 11, 11, 10)       

In [22]:
# We are importing the Adam Optimizer
from tensorflow.keras.optimizers import Adam

# We are importing the learningratescheduler callback
from tensorflow.keras.callbacks import LearningRateScheduler
#Creating the "scheduler" function with two arguments i.e learningrate and epoch
def scheduler(epoch, lr):
  return round(0.003 * 1/(1 + 0.319 * epoch), 10)

#	LearningRate = LearningRate * 1/(1 + decay * epoch) here decay is 0.319 and epoch is 10.

# here we are compiling our model and using 'categorical_crossentropy' as our loss function and adam as our optimizer with learning rate =0.003 and metrics is accuracy
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr = 0.003), metrics=['accuracy'])

# Here we are traing our model using the data and using batch size of 128,number of epochs are 20 and using verbose=1 for printing out all the results.
# In the callbacks parameter we are using the LearningRateScheduler which takes two arguments scheduler function which we built earlier to reduce the learning rate in each decay and verbose =1
model.fit(X_train, Y_train, batch_size=16, epochs=30, verbose=1, validation_data=(X_test, Y_test), callbacks=[LearningRateScheduler(scheduler, verbose=1)])


Epoch 00001: LearningRateScheduler reducing learning rate to 0.003.
Epoch 1/30

Epoch 00002: LearningRateScheduler reducing learning rate to 0.0022744503.
Epoch 2/30

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0018315018.
Epoch 3/30

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0015329586.
Epoch 4/30

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0013181019.
Epoch 5/30

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0011560694.
Epoch 6/30

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0010295127.
Epoch 7/30

Epoch 00008: LearningRateScheduler reducing learning rate to 0.0009279307.
Epoch 8/30

Epoch 00009: LearningRateScheduler reducing learning rate to 0.0008445946.
Epoch 9/30

Epoch 00010: LearningRateScheduler reducing learning rate to 0.0007749935.
Epoch 10/30

Epoch 00011: LearningRateScheduler reducing learning rate to 0.0007159905.
Epoch 11/30

Epoch 00012: LearningRateScheduler reducing lea

<tensorflow.python.keras.callbacks.History at 0x7f9bc0260550>