In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import skimage.transform
import time, os, datetime
from Model import Residual_Unit
from Model import Attention_Block
from Model import Attention_Block_NAL
from Model import AttentionResNet92
from Model import AttentionResNet92_NAL

In [2]:
print("TF version: ",tf.__version__)
print("Keras version:",tf.keras.__version__)

TF version:  2.0.0
Keras version: 2.2.4-tf


In [3]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping,TensorBoard, LearningRateScheduler
from tensorflow.keras.models import load_model

In [5]:
# Load the CIFAR10 data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train[:45000, :, :, :]
y_train = y_train[:45000]
x_val = x_train[-5000:, :, :, :]
y_val = y_train[-5000:]

# Convert class vectors to binary class matrices.
y_train = to_categorical(y_train, 10)
y_val = to_categorical(y_val, 10)
y_test = to_categorical(y_test, 10)

print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)

x_train shape: (45000, 32, 32, 3)
y_train shape: (45000, 10)
x_test shape: (10000, 32, 32, 3)
y_test shape: (10000, 10)


In [6]:
# define generators for training and validation data
train_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2)

val_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True)

test_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True)

# compute quantities required for feature normalization
train_datagen.fit(x_train)
val_datagen.fit(x_val)
test_datagen.fit(x_test)

## Training Residual AttentionNet 92

In [7]:
model = AttentionResNet92(shape=(32,32,3), in_channel=32, kernel_size=5, n_classes=10, dropout=0.4, regularization=0.00001)

def lr_schedule(epoch):
    lr = 1e-5
    if epoch > 80:
        lr *= 1e-5
    elif epoch > 60:
        lr *= 1e-4
    elif epoch >40:
        lr *= 1e-2
    print('Learning rate:', lr)
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)

# define loss, metrics, optimizer
optimizer = Adam(lr = lr_schedule(0) )
#optimizer = SGD(lr = lr_schedule(0), momentum=0.9, nesterov = True)
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Learning rate: 1e-05
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 32)   2432        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 32)   128         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 32)   0           batch_normalization[0][0]        
_________________________________________________________________________

__________________________________________________________________________________________________
activation_130 (Activation)     (None, 4, 4, 512)    0           batch_normalization_124[0][0]    
__________________________________________________________________________________________________
conv2d_177 (Conv2D)             (None, 4, 4, 512)    262656      activation_130[0][0]             
__________________________________________________________________________________________________
batch_normalization_125 (BatchN (None, 4, 4, 512)    2048        conv2d_177[0][0]                 
__________________________________________________________________________________________________
activation_131 (Activation)     (None, 4, 4, 512)    0           batch_normalization_125[0][0]    
__________________________________________________________________________________________________
conv2d_178 (Conv2D)             (None, 4, 4, 512)    2359808     activation_131[0][0]             
__________

In [8]:
batch_size = 128
epc = 100

start = time.time()

# training
train_generator = train_datagen.flow(x_train, y_train, batch_size=batch_size)
step_size_train = train_generator.n // train_generator.batch_size

# validation
val_generator = val_datagen.flow(x_val, y_val, batch_size=batch_size)
step_size_val = val_generator.n // val_generator.batch_size

# test
test_generator = test_datagen.flow(x_test, y_test, batch_size=batch_size)
step_size_test = test_generator.n // test_generator.batch_size

# usefull callbacks
log_dir="Logs/92full"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
lr_reducer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, verbose=1)
early_stopper = EarlyStopping(monitor='val_accuracy', patience=10, verbose=1)

model.fit_generator(train_generator,
                    steps_per_epoch = step_size_train,
                    epochs = epc,
                    validation_data = val_generator,
                    validation_steps = step_size_val,
                    callbacks=[tensorboard_callback, lr_reducer, lr_scheduler, early_stopper])

end = time.time()
print("Time taken by above cell is {}.".format((end-start)/60))

Learning rate: 1e-05
Epoch 1/100
Learning rate: 1e-05
Epoch 2/100
Learning rate: 1e-05
Epoch 3/100
Learning rate: 1e-05
Epoch 4/100
Learning rate: 1e-05
Epoch 5/100
Learning rate: 1e-05
Epoch 6/100
Learning rate: 1e-05
Epoch 7/100
Learning rate: 1e-05
Epoch 8/100
Learning rate: 1e-05
Epoch 9/100
Learning rate: 1e-05
Epoch 10/100
Learning rate: 1e-05
Epoch 11/100
Learning rate: 1e-05
Epoch 12/100
Learning rate: 1e-05
Epoch 13/100
Learning rate: 1e-05
Epoch 14/100
Learning rate: 1e-05
Epoch 15/100
Learning rate: 1e-05
Epoch 16/100
Learning rate: 1e-05
Epoch 17/100
Learning rate: 1e-05
Epoch 18/100
Learning rate: 1e-05
Epoch 19/100
Learning rate: 1e-05
Epoch 20/100
Learning rate: 1e-05
Epoch 21/100
Learning rate: 1e-05
Epoch 22/100
Learning rate: 1e-05
Epoch 23/100
Learning rate: 1e-05
Epoch 24/100
Learning rate: 1e-05
Epoch 25/100
Learning rate: 1e-05
Epoch 26/100
Learning rate: 1e-05
Epoch 27/100
Learning rate: 1e-05
Epoch 28/100
Learning rate: 1e-05
Epoch 29/100
Learning rate: 1e-05
Ep

Learning rate: 1.0000000000000001e-07
Epoch 49/100
Learning rate: 1.0000000000000001e-07
Epoch 50/100
Learning rate: 1.0000000000000001e-07
Epoch 51/100
Learning rate: 1.0000000000000001e-07
Epoch 52/100
Learning rate: 1.0000000000000001e-07
Epoch 53/100
Learning rate: 1.0000000000000001e-07
Epoch 54/100
Learning rate: 1.0000000000000001e-07
Epoch 55/100
Learning rate: 1.0000000000000001e-07
Epoch 56/100
Learning rate: 1.0000000000000001e-07
Epoch 57/100
Learning rate: 1.0000000000000001e-07
Epoch 58/100
Learning rate: 1.0000000000000001e-07
Epoch 59/100
Learning rate: 1.0000000000000001e-07
Epoch 60/100
Learning rate: 1.0000000000000001e-07
Epoch 61/100
Learning rate: 1e-09
Epoch 62/100
Learning rate: 1e-09
Epoch 63/100
Learning rate: 1e-09
Epoch 64/100
Learning rate: 1e-09
Epoch 65/100
Epoch 00065: ReduceLROnPlateau reducing learning rate to 9.999999717180686e-11.
Learning rate: 1e-09
Epoch 66/100
Learning rate: 1e-09
Epoch 67/100
Learning rate: 1e-09
Epoch 68/100
Learning rate: 1e-0

In [9]:
# evaluation
val_scores = model.evaluate_generator(val_generator, verbose=0)
test_scores = model.evaluate_generator(test_generator, verbose=0)
print('validation loss:', val_scores[0])
print('validation accuracy:', val_scores[1])
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

validation loss: 0.9397639229893684
validation accuracy: 0.668
Test loss: 1.018264941776855
Test accuracy: 0.6472


## Training Residual AttentionNet 92 - NAL

In [6]:
model = AttentionResNet92_NAL(shape=(32,32,3), in_channel=32, kernel_size=5, n_classes=10, dropout=None, regularization=0.00001)

def lr_schedule(epoch):
    lr = 1e-5
    if epoch > 80:
        lr *= 1e-5
    elif epoch > 60:
        lr *= 1e-4
    elif epoch >40:
        lr *= 1e-2
    print('Learning rate:', lr)
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)
# define loss, metrics, optimizer
optimizer = Adam(lr = lr_schedule(0) )
#optimizer = SGD(lr = lr_schedule(0), momentum=0.9, nesterov = True)
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

Learning rate: 1e-05


In [7]:
batch_size = 128
epc = 100

start = time.time()

# training
train_generator = train_datagen.flow(x_train, y_train, batch_size=batch_size)
step_size_train = train_generator.n // train_generator.batch_size

# validation
val_generator = val_datagen.flow(x_val, y_val, batch_size=batch_size)
step_size_val = val_generator.n // val_generator.batch_size

# test
test_generator = test_datagen.flow(x_test, y_test, batch_size=batch_size)
step_size_test = test_generator.n // test_generator.batch_size

# usefull callbacks
log_dir="Logs/92full-NAL"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
lr_reducer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, verbose=1)
early_stopper = EarlyStopping(monitor='val_accuracy', patience=15, verbose=1)

model.fit_generator(train_generator,
                    steps_per_epoch = step_size_train,
                    epochs = epc,
                    validation_data = val_generator,
                    validation_steps = step_size_val,
                    callbacks=[tensorboard_callback, lr_reducer, lr_scheduler, early_stopper])

end = time.time()
print("Time taken by above cell is {}.".format((end-start)/60))

Learning rate: 1e-05
Epoch 1/100
Learning rate: 1e-05
Epoch 2/100
Learning rate: 1e-05
Epoch 3/100
Learning rate: 1e-05
Epoch 4/100
Learning rate: 1e-05
Epoch 5/100
Learning rate: 1e-05
Epoch 6/100
Learning rate: 1e-05
Epoch 7/100
Learning rate: 1e-05
Epoch 8/100
Learning rate: 1e-05
Epoch 9/100
Learning rate: 1e-05
Epoch 10/100
Learning rate: 1e-05
Epoch 11/100
Learning rate: 1.0000000000000002e-06
Epoch 12/100
Learning rate: 1.0000000000000002e-06
Epoch 13/100
Learning rate: 1.0000000000000002e-06
Epoch 14/100
Learning rate: 1.0000000000000002e-06
Epoch 15/100
Learning rate: 1.0000000000000002e-06
Epoch 16/100
Learning rate: 1.0000000000000002e-06
Epoch 17/100
Learning rate: 1.0000000000000002e-06
Epoch 18/100
Learning rate: 1.0000000000000002e-06
Epoch 19/100
Learning rate: 1.0000000000000002e-06
Epoch 20/100
Learning rate: 1.0000000000000002e-06
Epoch 21/100
Learning rate: 1.0000000000000001e-07
Epoch 22/100
Learning rate: 1.0000000000000001e-07
Epoch 23/100
Learning rate: 1.000000

## Training CIFAR-100

In [10]:
# Load the CIFAR10 data.
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
x_train = x_train[:25000, :, :, :]
y_train = y_train[:25000]
x_val = x_train[-5000:, :, :, :]
y_val = y_train[-5000:]

# Convert class vectors to binary class matrices.
y_train = to_categorical(y_train, 100)
y_val = to_categorical(y_val, 100)
y_test = to_categorical(y_test, 100)

print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)

x_train shape: (25000, 32, 32, 3)
y_train shape: (25000, 100)
x_test shape: (10000, 32, 32, 3)
y_test shape: (10000, 100)


In [11]:
# define generators for training and validation data
train_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2)

val_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True)

test_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True)

# compute quantities required for feature normalization
train_datagen.fit(x_train)
val_datagen.fit(x_val)
test_datagen.fit(x_test)

In [12]:
model = AttentionResNet92(shape=(32,32,3), in_channel=32, kernel_size=5, n_classes=100, dropout=0.4, regularization=0.0001)

def lr_schedule(epoch):
    lr = 1e-5
    if epoch > 80:
        lr *= 1e-5
    elif epoch > 60:
        lr *= 1e-4
    elif epoch >40:
        lr *= 1e-2
    print('Learning rate:', lr)
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)

# define loss, metrics, optimizer
optimizer = Adam(lr = lr_schedule(0) )
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Learning rate: 1e-05
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_193 (Conv2D)             (None, 32, 32, 32)   2432        input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_136 (BatchN (None, 32, 32, 32)   128         conv2d_193[0][0]                 
__________________________________________________________________________________________________
activation_142 (Activation)     (None, 32, 32, 32)   0           batch_normalization_136[0][0]    
_______________________________________________________________________

batch_normalization_223 (BatchN (None, 2, 2, 512)    2048        conv2d_313[0][0]                 
__________________________________________________________________________________________________
activation_224 (Activation)     (None, 4, 4, 512)    0           batch_normalization_215[0][0]    
__________________________________________________________________________________________________
activation_232 (Activation)     (None, 2, 2, 512)    0           batch_normalization_223[0][0]    
__________________________________________________________________________________________________
conv2d_304 (Conv2D)             (None, 4, 4, 512)    262656      activation_224[0][0]             
__________________________________________________________________________________________________
conv2d_314 (Conv2D)             (None, 2, 2, 512)    262656      activation_232[0][0]             
__________________________________________________________________________________________________
conv2d_315

In [13]:
batch_size = 128
epc = 60

start = time.time()

# training
train_generator = train_datagen.flow(x_train, y_train, batch_size=batch_size)
step_size_train = train_generator.n // train_generator.batch_size

# validation
val_generator = val_datagen.flow(x_val, y_val, batch_size=batch_size)
step_size_val = val_generator.n // val_generator.batch_size

# test
test_generator = test_datagen.flow(x_test, y_test, batch_size=batch_size)
step_size_test = test_generator.n // test_generator.batch_size

# usefull callbacks
log_dir="Logs/92-CIFAR100"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
lr_reducer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, verbose=1)
early_stopper = EarlyStopping(monitor='val_accuracy', patience=15, verbose=1)

model.fit_generator(train_generator,
                    steps_per_epoch = step_size_train,
                    epochs = epc,
                    validation_data = val_generator,
                    validation_steps = step_size_val,
                    callbacks=[tensorboard_callback, lr_reducer, lr_scheduler, early_stopper])

end = time.time()
print("Time taken by above cell is {}.".format((end-start)/60))

Learning rate: 1e-05
Epoch 1/60
Learning rate: 1e-05
Epoch 2/60
Learning rate: 1e-05
Epoch 3/60
Learning rate: 1e-05
Epoch 4/60
Learning rate: 1e-05
Epoch 5/60
Learning rate: 1e-05
Epoch 6/60
Learning rate: 1e-05
Epoch 7/60
Learning rate: 1e-05
Epoch 8/60
Learning rate: 1e-05
Epoch 9/60
Learning rate: 1e-05
Epoch 10/60
Learning rate: 1e-05
Epoch 11/60
Learning rate: 1e-05
Epoch 12/60
Learning rate: 1e-05
Epoch 13/60
Learning rate: 1e-05
Epoch 14/60
Learning rate: 1e-05
Epoch 15/60


Learning rate: 1e-05
Epoch 16/60
Learning rate: 1e-05
Epoch 17/60
Learning rate: 1e-05
Epoch 18/60
Learning rate: 1e-05
Epoch 19/60
Learning rate: 1e-05
Epoch 20/60
Learning rate: 1e-05
Epoch 21/60
Learning rate: 1e-05
Epoch 22/60
Learning rate: 1e-05
Epoch 23/60
Learning rate: 1e-05
Epoch 24/60
Learning rate: 1e-05
Epoch 25/60
Learning rate: 1e-05
Epoch 26/60
Learning rate: 1e-05
Epoch 27/60
Learning rate: 1e-05
Epoch 28/60
Learning rate: 1e-05
Epoch 29/60
Learning rate: 1e-05
Epoch 30/60


Learning rate: 1e-05
Epoch 31/60
Learning rate: 1e-05
Epoch 32/60
Learning rate: 1e-05
Epoch 33/60
Learning rate: 1e-05
Epoch 34/60
Learning rate: 1e-05
Epoch 35/60
Learning rate: 1e-05
Epoch 36/60
Learning rate: 1e-05
Epoch 37/60
Learning rate: 1e-05
Epoch 38/60
Learning rate: 1e-05
Epoch 39/60
Learning rate: 1e-05
Epoch 40/60
Learning rate: 1e-05
Epoch 41/60
Learning rate: 1.0000000000000001e-07
Epoch 42/60
Learning rate: 1.0000000000000001e-07
Epoch 43/60
Learning rate: 1.0000000000000001e-07
Epoch 44/60
Learning rate: 1.0000000000000001e-07
Epoch 45/60


Learning rate: 1.0000000000000001e-07
Epoch 46/60
Learning rate: 1.0000000000000001e-07
Epoch 47/60
Learning rate: 1.0000000000000001e-07
Epoch 48/60
Learning rate: 1.0000000000000001e-07
Epoch 49/60
Learning rate: 1.0000000000000001e-07
Epoch 50/60
Learning rate: 1.0000000000000001e-07
Epoch 51/60
Learning rate: 1.0000000000000001e-07
Epoch 52/60
Learning rate: 1.0000000000000001e-07
Epoch 53/60
Learning rate: 1.0000000000000001e-07
Epoch 54/60
Learning rate: 1.0000000000000001e-07
Epoch 55/60
Learning rate: 1.0000000000000001e-07
Epoch 56/60
Learning rate: 1.0000000000000001e-07
Epoch 57/60
Learning rate: 1.0000000000000001e-07
Epoch 58/60
Learning rate: 1.0000000000000001e-07
Epoch 59/60


Learning rate: 1.0000000000000001e-07
Epoch 60/60
Time taken by above cell is 201.2063398520152.


In [14]:
# evaluation
val_scores = model.evaluate_generator(val_generator, verbose=0)
test_scores = model.evaluate_generator(test_generator, verbose=0)
print('validation loss:', val_scores[0])
print('validation accuracy:', val_scores[1])
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

validation loss: 2.525966650247574
validation accuracy: 0.356
Test loss: 2.8661425928526287
Test accuracy: 0.3053
