# Initialization

In [None]:
# data processing
import numpy as np
import pandas as pd 
from collections import defaultdict

# data visualization
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import style
sns.set()
import urllib.request


import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))
from tensorflow import keras

# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
# If using Google Colab
from google.colab import drive
BASE_PATH = '/content/drive'
drive.mount(BASE_PATH)

# change directory
import os
PROJECT_PATH = os.path.join(BASE_PATH, "MyDrive", "2021-09 Fall Semester", "ECBM 4040 Neural Network Deep Learning", "Project", "recreating-residual-attention-network")
os.chdir(PROJECT_PATH)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Import created modules
from src.models.ResidualAttentionNetwork import ResidualAttentionNetwork, Attention56, Attention92
from src.utils import generate_data

# Modelling

In [None]:
x_train, y_train, x_test, y_test, datagen = generate_data.get_cifar10(rotation_range=20,
                                                                      width_shift_range=0.2,
                                                                      height_shift_range=0.2,
                                                                      horizontal_flip=True,
                                                                      validation_split=0.2)

In [None]:
INPUT_SHAPE = x_train.shape[1:]
NUM_CLASS = y_train.shape[1]
BATCH_SIZE = 256
N_EPOCH = 10

## Naive Attention Learning vs Residual Attention Learning

### Naive Attention Learning

In [None]:
ran_model = ResidualAttentionNetwork(learning_type='nal')
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "residual_attention_network"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             multiple                  864       
                                                                 
 batch_normalization (BatchN  multiple                 128       
 ormalization)                                                   
                                                                 
 re_lu (ReLU)                multiple                  0         
                                                                 
 residual_unit (ResidualUnit  multiple                 6432      
 )                                                               
                                                                 
 attention_module (Attention  multiple                 116608    
 Module)                                                         
                                        

In [None]:
nal_model = ran_model

nal_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])


history = nal_model.fit(datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='training'), 
                        validation_data=datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='validation'),
                        epochs=N_EPOCH, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Residual Attention Learning

In [None]:
ran_model = ResidualAttentionNetwork(learning_type='arl')
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "residual_attention_network_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_139 (Conv2D)         multiple                  864       
                                                                 
 batch_normalization_107 (Ba  multiple                 128       
 tchNormalization)                                               
                                                                 
 re_lu_107 (ReLU)            multiple                  0         
                                                                 
 residual_unit_33 (ResidualU  multiple                 6432      
 nit)                                                            
                                                                 
 attention_module_3 (Attenti  multiple                 116608    
 onModule)                                                       
                                      

In [None]:
arl_model = ran_model

arl_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])


history = arl_model.fit(datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='training'), 
                        validation_data=datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='validation'),
                        epochs=N_EPOCH, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Different number of attention module

In this part, the experiment is done using Residual Attention Learning mechanism

### Attention-56

In [None]:
ran_model = Attention56()
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "attention56"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             multiple                  1728      
                                                                 
 batch_normalization (BatchN  multiple                 256       
 ormalization)                                                   
                                                                 
 re_lu (ReLU)                multiple                  0         
                                                                 
 max_pooling2d (MaxPooling2D  multiple                 0         
 )                                                               
                                                                 
 residual_unit (ResidualUnit  multiple                 75904     
 )                                                               
                                                       

In [None]:
a56_model = ran_model

a56_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])


history = a56_model.fit(datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='training'), 
                        validation_data=datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='validation'),
                        epochs=N_EPOCH, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
 14/157 [=>............................] - ETA: 3:04 - loss: 1.4835 - accuracy: 0.4554

KeyboardInterrupt: ignored

### Attention-92

In [None]:
ran_model = Attention92()
inputs = tf.keras.Input((32, 32, 3))
ran_model(inputs)
ran_model.summary()

Model: "attention92"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_139 (Conv2D)         multiple                  1728      
                                                                 
 batch_normalization_107 (Ba  multiple                 256       
 tchNormalization)                                               
                                                                 
 re_lu_107 (ReLU)            multiple                  0         
                                                                 
 max_pooling2d_7 (MaxPooling  multiple                 0         
 2D)                                                             
                                                                 
 residual_unit_33 (ResidualU  multiple                 75904     
 nit)                                                            
                                                       

In [None]:
a92_model = ran_model

a92_model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])


history = a92_model.fit(datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='training'), 
                        validation_data=datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='validation'),
                        epochs=N_EPOCH, verbose=1)