Import Libraries

In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.datasets import mnist
from keras import backend as k

Variables:
batch: the process of splitting the training dataset in n batches (mini-batches),
classes: number of classifications (labels) of the data,
epochs: variations, one epoch is one forward pass + one backward pass on training

In [2]:
batch_size = 128
num_classes = 10
epochs = 4

Assign training and test data

In [3]:
img_rows, img_cols = 28,28
(x_train,y_train),(x_test,y_test) =   mnist.load_data()

Reshape the images

In [4]:
if k.image_data_format()=='channels_first':
    x_train=x_train.reshape(x_train.shape[0],img_rows,img_cols,1)
    x_test=x_test.reshape(x_test.shape[0],img_rows,img_cols,1)
else:
    x_train=x_train.reshape(x_train.shape[0],img_rows,img_cols,1)
    x_test=x_test.reshape(x_test.shape[0],img_rows,img_cols,1)

input_shape=(img_rows,img_cols,1)
x_train = x_train/255.0
x_test=x_test/255.0
print('x_train shape:',x_train.shape,'\nx_test shape:',x_test.shape)

x_train shape: (60000, 28, 28, 1) 
x_test shape: (10000, 28, 28, 1)


Convert class vectors to binary class matrices

In [5]:
y_train=keras.utils.to_categorical(y_train,num_classes)
y_test=keras.utils.to_categorical(y_test,num_classes)

Design the CNN architecture

In [6]:
from keras import layers
from keras.layers import Input
from keras.layers import Dense,Dropout,Flatten, Multiply
from keras.layers import Conv2D,MaxPool2D, Activation

In [7]:
inputs = Input(shape=input_shape)

conv1 = Conv2D(32,kernel_size=(3,3),activation='relu')(inputs)
pool1 = MaxPool2D(pool_size=(2,2))(conv1)
#Attention1
attention_conv1 = Conv2D(1, (1,1), padding='same', activation='sigmoid')(pool1)
attention_mul1 = Multiply()([pool1, attention_conv1])
pool2 = MaxPool2D(pool_size=(2,2))(attention_mul1)
##########

conv2 = Conv2D(64,kernel_size=(3,3),activation='relu')(pool2)
pool3 = MaxPool2D(pool_size=(2,2))(conv2)
#Attention2
attention_conv2 = Conv2D(1, (1,1), padding='same', activation='sigmoid')(pool3)
attention_mul2 = Multiply()([pool3, attention_conv2])
pool4 = MaxPool2D(pool_size=(2,2))(attention_mul2)
##########

flatten2 = Flatten()(pool4)
dense2 = Dense(32,activation='relu')(flatten2)
dense3 = Dense(num_classes,activation='softmax')(dense2)

model = keras.Model(inputs=inputs, outputs=dense3)

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 26, 26, 32)   320         ['input_1[0][0]']                
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 13, 13, 32)   0           ['conv2d[0][0]']                 
                                                                                                  
 conv2d_1 (Conv2D)              (None, 13, 13, 1)    33          ['max_pooling2d[0][0]']          
                                                                                              

Compile the model

In [8]:
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.categorical_crossentropy,
              metrics=['accuracy']
             )
model.fit(x_train,y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test,y_test)
          )

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f8a822c1f40>

Save the Model

In [9]:
#model.save('AttCNNmodel.h5')
#print("model is saved")