# Initialization

In [1]:
# data processing
import numpy as np
import pandas as pd 
from collections import defaultdict

# data visualization
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import style
sns.set()
import urllib.request


import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))
from tensorflow import keras

# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# If using Google Colab
from google.colab import drive
BASE_PATH = '/content/drive'
drive.mount(BASE_PATH)

# change directory
import os
PROJECT_PATH = os.path.join(BASE_PATH, "MyDrive", "2021-09 Fall Semester", "ECBM 4040 Neural Network Deep Learning", "Project", "recreating-residual-attention-network")
os.chdir(PROJECT_PATH)

Mounted at /content/drive


In [3]:
# Import created modules
from src.models.ResidualAttentionNetwork import ResidualAttentionNetwork, Attention56, Attention92, Attention128, Attention164
from src.utils import generate_data

# Modelling

In [4]:
INPUT_SHAPE, NUM_CLASS, train_ds, val_ds, test_ds, _ = generate_data.get_cifar10()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [5]:
BATCH_SIZE = 256
N_EPOCH = 10


In [6]:
train_ds = train_ds.shuffle(buffer_size=BATCH_SIZE*4).batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.shuffle(buffer_size=BATCH_SIZE*4).batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.shuffle(buffer_size=BATCH_SIZE*4).batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

## Naive Attention Learning vs Residual Attention Learning

### Naive Attention Learning

In [7]:
ran_model = ResidualAttentionNetwork(input_shape=INPUT_SHAPE, num_class=NUM_CLASS, learning_type='nal')
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "residual_attention_network"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 32, 32, 3)         0         
                                                                 
 conv2d (Conv2D)             multiple                  864       
                                                                 
 batch_normalization (BatchN  multiple                 128       
 ormalization)                                                   
                                                                 
 re_lu (ReLU)                multiple                  0         
                                                                 
 max_pooling2d (MaxPooling2D  multiple                 0         
 )                                                               
                                                                 
 residual_unit (ResidualUnit  multiple  

In [8]:
nal_model = ran_model

nal_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])


history = nal_model.fit(train_ds,
                        validation_data=val_ds,
                        epochs=N_EPOCH, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Residual Attention Learning

In [9]:
ran_model = ResidualAttentionNetwork(input_shape=INPUT_SHAPE, num_class=NUM_CLASS, learning_type='arl')
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "residual_attention_network_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 32, 32, 3)         0         
                                                                 
 conv2d_139 (Conv2D)         multiple                  864       
                                                                 
 batch_normalization_107 (Ba  multiple                 128       
 tchNormalization)                                               
                                                                 
 re_lu_107 (ReLU)            multiple                  0         
                                                                 
 max_pooling2d_7 (MaxPooling  multiple                 0         
 2D)                                                             
                                                                 
 residual_unit_33 (ResidualU  multiple

In [10]:
arl_model = ran_model

arl_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])


history = arl_model.fit(train_ds,
                        validation_data=val_ds,
                        epochs=N_EPOCH, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Different number of attention module

In this part, the experiment is done using Residual Attention Learning mechanism

### Attention-56

In [11]:
ran_model = Attention56()
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "attention56"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_1 (Sequential)   (None, 32, 32, 3)         0         
                                                                 
 conv2d_278 (Conv2D)         multiple                  864       
                                                                 
 batch_normalization_214 (Ba  multiple                 128       
 tchNormalization)                                               
                                                                 
 re_lu_214 (ReLU)            multiple                  0         
                                                                 
 max_pooling2d_14 (MaxPoolin  multiple                 0         
 g2D)                                                            
                                                                 
 residual_unit_66 (ResidualU  multiple                 

In [None]:
a56_model = ran_model

a56_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])


history = a56_model.fit(train_ds,
                        validation_data=val_ds,
                        epochs=N_EPOCH, verbose=1)

Epoch 1/10
Epoch 2/10
  8/157 [>.............................] - ETA: 3:00 - loss: 2.3873 - accuracy: 0.1514

KeyboardInterrupt: ignored

### Attention-92

In [12]:
ran_model = Attention92()
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "attention92"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_2 (Sequential)   (None, 32, 32, 3)         0         
                                                                 
 conv2d_417 (Conv2D)         multiple                  864       
                                                                 
 batch_normalization_321 (Ba  multiple                 128       
 tchNormalization)                                               
                                                                 
 re_lu_321 (ReLU)            multiple                  0         
                                                                 
 max_pooling2d_21 (MaxPoolin  multiple                 0         
 g2D)                                                            
                                                                 
 residual_unit_99 (ResidualU  multiple                 

In [None]:
a92_model = ran_model

a92_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])


history = a92_model.fit(train_ds,
                        validation_data=val_ds,
                        epochs=N_EPOCH, verbose=1)

### Attention-128

In [13]:
ran_model = Attention128()
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "attention128"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_3 (Sequential)   (None, 32, 32, 3)         0         
                                                                 
 conv2d_670 (Conv2D)         multiple                  864       
                                                                 
 batch_normalization_515 (Ba  multiple                 128       
 tchNormalization)                                               
                                                                 
 re_lu_515 (ReLU)            multiple                  0         
                                                                 
 max_pooling2d_34 (MaxPoolin  multiple                 0         
 g2D)                                                            
                                                                 
 residual_unit_159 (Residual  multiple                

In [None]:
a128_model = ran_model

a128_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                   loss=tf.keras.losses.CategoricalCrossentropy(), 
                   metrics=['accuracy'])


history = a128_model.fit(train_ds,
                         validation_data=val_ds,
                         epochs=N_EPOCH, verbose=1)

### Attention-164

In [14]:
ran_model = Attention164()
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "attention164"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_4 (Sequential)   (None, 32, 32, 3)         0         
                                                                 
 conv2d_1037 (Conv2D)        multiple                  864       
                                                                 
 batch_normalization_796 (Ba  multiple                 128       
 tchNormalization)                                               
                                                                 
 re_lu_796 (ReLU)            multiple                  0         
                                                                 
 max_pooling2d_53 (MaxPoolin  multiple                 0         
 g2D)                                                            
                                                                 
 residual_unit_246 (Residual  multiple                

In [None]:
a164_model = ran_model

a164_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                   loss=tf.keras.losses.CategoricalCrossentropy(), 
                   metrics=['accuracy'])


history = a164_model.fit(train_ds,
                         validation_data=val_ds,
                         epochs=N_EPOCH, verbose=1)