Import Libraries

In [1]:
!pip install tensorflow-io

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-io
  Downloading tensorflow_io-0.32.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (28.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.0/28.0 MB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-io
Successfully installed tensorflow-io-0.32.0


In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.datasets import mnist
from keras import backend as k

# 1. Process MNIST dataset

Variables:
batch: the process of splitting the training dataset in n batches (mini-batches),
classes: number of classifications (labels) of the data,
epochs: variations, one epoch is one forward pass + one backward pass on training

In [3]:
batch_size = 128
num_classes = 10
epochs = 4

Assign training and test data

In [4]:
img_rows, img_cols = 28,28
(x_train,y_train),(x_test,y_test) =   mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Reshape the images

In [5]:
if k.image_data_format()=='channels_first':
    x_train=x_train.reshape(x_train.shape[0],img_rows,img_cols,1)
    x_test=x_test.reshape(x_test.shape[0],img_rows,img_cols,1)
else:
    x_train=x_train.reshape(x_train.shape[0],img_rows,img_cols,1)
    x_test=x_test.reshape(x_test.shape[0],img_rows,img_cols,1)

input_shape=(img_rows,img_cols,1)
x_train = x_train/255.0
x_test=x_test/255.0
print('x_train shape:',x_train.shape,'\nx_test shape:',x_test.shape)

x_train shape: (60000, 28, 28, 1) 
x_test shape: (10000, 28, 28, 1)


Convert class vectors to binary class matrices

In [6]:
y_train=keras.utils.to_categorical(y_train,num_classes)
y_test=keras.utils.to_categorical(y_test,num_classes)

# 2. CNN no Attention

Design the CNN architecture

In [7]:
from keras.models import Sequential
from keras.layers import Dense,Flatten,Input
from keras.layers import Conv2D,MaxPool2D,Multiply

In [8]:
model=Sequential()

model.add( Conv2D(32,kernel_size=(3,3),activation='relu',input_shape=input_shape) )
model.add( MaxPool2D(pool_size=(2,2)) )
model.add( Conv2D(64,kernel_size=(3,3),activation='relu') )
model.add( MaxPool2D(pool_size=(2,2)) )
model.add( Flatten() )
model.add( Dense(32,activation='relu') )
model.add( Dense(num_classes,activation='softmax') )
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 1600)              0         
                                                                 
 dense (Dense)               (None, 32)                5

In [9]:
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.categorical_crossentropy,
              metrics=['accuracy']
             )
model.fit(x_train,y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test,y_test)
          )

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f4023c0d430>

In [16]:
test_loss, test_acc = model.evaluate(x_test,y_test)
print('Test Accuracy = {:.2f} %:'.format(np.round(test_acc, 3)*100))

Test Accuracy = 98.80 %:


# 3. CNN with Attention

Design the CNN architecture

In [11]:
inputs = Input(shape=input_shape)

conv1 = Conv2D(32,kernel_size=(3,3),activation='relu')(inputs)
pool1 = MaxPool2D(pool_size=(2,2))(conv1)
#Attention1
attention_conv1 = Conv2D(1, (1,1), padding='same', activation='sigmoid')(pool1)
attention_mul1 = Multiply()([pool1, attention_conv1])
pool2 = MaxPool2D(pool_size=(2,2))(attention_mul1)
##########

conv2 = Conv2D(64,kernel_size=(3,3),activation='relu')(pool2)
pool3 = MaxPool2D(pool_size=(2,2))(conv2)
#Attention2
attention_conv2 = Conv2D(1, (1,1), padding='same', activation='sigmoid')(pool3)
attention_mul2 = Multiply()([pool3, attention_conv2])
pool4 = MaxPool2D(pool_size=(2,2))(attention_mul2)
##########

flatten2 = Flatten()(pool4)
dense2 = Dense(32,activation='relu')(flatten2)
dense3 = Dense(num_classes,activation='softmax')(dense2)

modelAtt = keras.Model(inputs=inputs, outputs=dense3)

modelAtt.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 conv2d_2 (Conv2D)              (None, 26, 26, 32)   320         ['input_1[0][0]']                
                                                                                                  
 max_pooling2d_2 (MaxPooling2D)  (None, 13, 13, 32)  0           ['conv2d_2[0][0]']               
                                                                                                  
 conv2d_3 (Conv2D)              (None, 13, 13, 1)    33          ['max_pooling2d_2[0][0]']        
                                                                                              

In [12]:
modelAtt.compile(optimizer=keras.optimizers.Adam(),
              loss= keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy']
             )
modelAtt.fit(x_train,y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test,y_test)
          )

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f402375f1f0>

In [15]:
test_loss, test_acc = modelAtt.evaluate(x_test,y_test)
print('Test Accuracy = {:.2f} %:'.format(np.round(test_acc, 3)*100))

Test Accuracy = 97.80 %:
