In [1]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization,LayerNormalization
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.python.data import Dataset, AUTOTUNE

In [2]:
from tensorflow.python.eager.context import PhysicalDevice
from typing import List
print("TensorFlow version:", tf.__version__)
# setting the growth GPU memory occupying and print devices
try:
    devices: List[PhysicalDevice] = tf.config.list_physical_devices('GPU')
    for device in devices:
        tf.config.experimental.set_memory_growth(device, True)
    print("Use devices:", list(map(lambda d: d.name, devices)))
except IndexError:
    print("Use CPU")

TensorFlow version: 2.6.0
Use devices: ['/physical_device:GPU:0']


# load Data

In [3]:
cifar100 = tf.keras.datasets.cifar100
(train_images, train_labels), (test_images, test_labels) = cifar100.load_data()

In [4]:
pair_dataset = Dataset.from_tensor_slices((train_images, train_labels))
test_dataset = Dataset.from_tensor_slices((test_images, test_labels))
# zip images and labels as pairs

# split the train and validation dataset
val_size = int(len(train_images)* 0.2)
train_dataset = pair_dataset.skip(val_size)
val_dataset = pair_dataset.take(val_size)

# define the batch_size and shuffle
train_dataset = train_dataset.cache().shuffle(buffer_size=1000).batch(256).prefetch(AUTOTUNE)
val_dataset = val_dataset.cache().batch(64).prefetch(AUTOTUNE)
test_dataset = test_dataset.cache().shuffle(buffer_size=1000).batch(256).prefetch(AUTOTUNE)

# Task 1 Baseline CNN model

In [5]:
Baseline_CNN_model = Sequential([
    Conv2D(16,7,strides= 1,padding='valid', activation="relu", input_shape=(32, 32, 3)),
    MaxPooling2D(2, strides=2),
    Conv2D(32, 5,strides= 1,padding='valid', activation="relu"),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation="relu"),
    Dense(100),
])

# TODO - Print summary
Baseline_CNN_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 16)        2368      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 9, 9, 32)          12832     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               65664     
_________________________________________________________________
dense_1 (Dense)              (None, 100)               1

In [6]:
learning_rate = 1e-3
lr_decay = ExponentialDecay(learning_rate, decay_steps=100, decay_rate=0.96)

In [7]:
Baseline_CNN_model.compile(optimizer=Adam(lr_decay),
                  loss=SparseCategoricalCrossentropy(from_logits=True),
                  metrics=["accuracy"])
history = Baseline_CNN_model.fit(train_dataset, validation_data=val_dataset, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [8]:
loss, acc = Baseline_CNN_model.evaluate(test_dataset)
print("Accuracy:", acc)

Accuracy: 0.19580000638961792


# Task 2 Best model

Using Rescaling, Data Normalization, Data augmentation ,Batch Normalizationand,layer normalization and ResNet18 model to get best accuracy

In [9]:
from tensorflow.keras.layers.experimental.preprocessing import Rescaling,RandomFlip, RandomRotation, RandomZoom
data_preprocess = Sequential([
  Rescaling(1 / 255,input_shape=(32,32,3)),
  RandomFlip("horizontal_and_vertical"),
  RandomRotation(0.2),
  RandomZoom(0.1)
])

In [10]:
# TODO - Define ResNet18 model.
from tensorflow.keras import Model
from tensorflow.keras.layers import Layer, BatchNormalization, ReLU, Add, GlobalAvgPool2D, Input,LayerNormalization

from enum import Enum


class ResidualBlockType(Enum):
    SHALLOW = "shallow"  # the block used in ResNet18 and ResNet34
    DEEP = "deep"  # the block used in ResNet50, ResNet101 and ResNet152


class ResidualBlock(Layer):
    class ShallowPath(Layer):
        def __init__(self, filters: int, strides: int):
            super().__init__()
            self.conv0 = Conv2D(filters, kernel_size=3, padding="same", strides=strides)
            self.bn0 = BatchNormalization()
            self.relu0 = ReLU()
            self.layer_norm0 = LayerNormalization(axis=3 , center=True , scale=True)
            self.conv1 = Conv2D(filters, kernel_size=3, padding="same")
            self.bn1 = BatchNormalization()
            self.layer_norm1 = LayerNormalization(axis=3 , center=True , scale=True)
        def call(self, inputs, *args, **kwargs):
            x = self.conv0(inputs)
            x = self.layer_norm0(x)
            x = self.bn0(x)
            x = self.relu0(x)

            x = self.conv1(x)
            x = self.layer_norm1(x)
            x = self.bn1(x)

            return x

    class DeepPath(Layer):
        def __init__(self, filters: int, strides: int):
            super().__init__()
            self.conv0 = Conv2D(filters, kernel_size=1, padding="same", strides=strides)
            self.bn0 = BatchNormalization()
            self.layer_norm0 = LayerNormalization(axis=3 , center=True , scale=True)
            self.relu0 = ReLU()

            self.conv1 = Conv2D(filters, kernel_size=3, padding="same")
            self.bn1 = BatchNormalization()
            self.layer_norm1 = LayerNormalization(axis=3 , center=True , scale=True)
            self.relu1 = ReLU()

            self.conv2 = Conv2D(filters * 4, kernel_size=1, padding="same")
            self.bn2 = BatchNormalization()
            self.layer_norm2 = LayerNormalization(axis=3 , center=True , scale=True)

        def call(self, inputs, *args, **kwargs):
            x = self.conv0(inputs)
            x = self.layer_norm0(x)
            x = self.bn0(x)
            x = self.relu0(x)

            x = self.conv1(x)
            x = self.layer_norm1(x)
            x = self.bn1(x)
            x = self.relu1(x)

            x = self.conv2(x)
            x = self.layer_norm2(x)
            x = self.bn2(x)

            return x

    def __init__(self, filters: int, strides: int, repeat: int,
                 architecture: ResidualBlockType = ResidualBlockType.SHALLOW):
        super().__init__()
        if architecture == ResidualBlockType.SHALLOW:
            MainPath = ResidualBlock.ShallowPath
        elif architecture == ResidualBlockType.DEEP:
            MainPath = ResidualBlock.DeepPath
        else:
            raise ValueError("Unknown residual block type")

        self.repeat = repeat
        self.blocks = []
        for i in range(repeat):
            main_path = MainPath(filters, strides=strides) if i == 0 else MainPath(filters, 1)
            residual_path = Conv2D(filters, kernel_size=1,
                                   strides=strides) if i == 0 else Layer()  # Layer class used here as Identity layer
            addition = Add()
            relu = ReLU()
            self.blocks.append((main_path, residual_path, addition, relu))

    def call(self, inputs, *args, **kwargs):
        x = inputs
        for i in range(self.repeat):
            main_path, residual_path, addition, relu = self.blocks[i]
            main_path_x = main_path(x)
            residual_path_x = residual_path(x)
            x = addition([main_path_x, residual_path_x])
            x = relu(x)
        return x


class ResNet18(Model):

    def __init__(self):
        super().__init__()
        self.rescaling = tf.keras.layers.Rescaling(1/255)
        self.norm_data = tf.keras.layers.Normalization()
        self.conv1 = Conv2D(64, 7, strides=2, padding="same")
        self.bn1 = BatchNormalization()
        self.layer_norm = LayerNormalization(axis=3 , center=True , scale=True)
        self.relu1 = ReLU()
        self.pool1 = MaxPooling2D(3, strides=2, padding="same")

        residual_type = ResidualBlockType.SHALLOW

        self.conv2_x = ResidualBlock(filters=64, strides=1, repeat=2, architecture=residual_type)
        self.conv3_x = ResidualBlock(filters=128, strides=2, repeat=2, architecture=residual_type)
        self.conv4_x = ResidualBlock(filters=256, strides=2, repeat=2, architecture=residual_type)
        self.conv5_x = ResidualBlock(filters=512, strides=2, repeat=2, architecture=residual_type)

        self.global_pool = GlobalAvgPool2D()
        self.fc = Dense(100)

        self.build(input_shape=(None, 32, 32, 3))
        self.call(Input(shape=(32, 32, 3)))

    def call(self, inputs, training=None, mask=None):
        x = self.rescaling(inputs)
        x = self.norm_data(x)
        x = self.conv1(x)
        x = self.layer_norm(x)
        x = self.bn1(x)

        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2_x(x)
        x = self.conv3_x(x)
        x = self.conv4_x(x)
        x = self.conv5_x(x)
        x = self.global_pool(x)
        x = self.fc(x)

        return x

    def get_config(self):
        pass
best_model = ResNet18()
best_model.summary()

Model: "res_net18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling_1 (Rescaling)      (None, 32, 32, 3)         0         
_________________________________________________________________
normalization (Normalization (None, 32, 32, 3)         7         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 64)        9472      
_________________________________________________________________
batch_normalization (BatchNo (None, 16, 16, 64)        256       
_________________________________________________________________
layer_normalization (LayerNo (None, 16, 16, 64)        128       
_________________________________________________________________
re_lu (ReLU)                 (None, 16, 16, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 8, 8, 64)          0 

In [12]:
best_model.compile(optimizer=Adam(lr_decay),
                  loss=SparseCategoricalCrossentropy(from_logits=True),
                  metrics=["accuracy"])
epochs = 50
history_18 = best_model.fit(train_dataset, validation_data=val_dataset, epochs=epochs)
loss_1, acc_1 = best_model.evaluate(test_dataset)
print("Accuracy:", acc_1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy: 0.43380001187324524


# Task 3
add attention layer to the resNet18

In [13]:
!pip install tensorflow-addons

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.16.1-cp38-cp38-win_amd64.whl (755 kB)
     -------------------------------------- 755.7/755.7 KB 6.8 MB/s eta 0:00:00
Collecting typeguard>=2.7
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.16.1 typeguard-2.13.3


In [15]:
# TODO - Define ResNet18 model.
import tensorflow_addons as tfa

from tensorflow.keras import Model
from tensorflow.keras.layers import Layer, BatchNormalization, ReLU, Add, GlobalAvgPool2D, Input,LayerNormalization,Attention,Softmax
# from tensorflow.keras.layers import Attention,Reshape,Input,Conv2D,Conv2DTranspose
from tensorflow.keras.models import Model

from enum import Enum


class ResidualBlockType(Enum):
    SHALLOW = "shallow"  # the block used in ResNet18 and ResNet34
    DEEP = "deep"  # the block used in ResNet50, ResNet101 and ResNet152


class ResidualBlock(Layer):
    class ShallowPath(Layer):
        def __init__(self, filters: int, strides: int):
            super().__init__()
            self.conv0 = Conv2D(filters, kernel_size=3, padding="same", strides=strides)
            self.bn0 = BatchNormalization()
            self.relu0 = ReLU()
            self.layer_norm0 = LayerNormalization(axis=3 , center=True , scale=True)
            self.conv1 = Conv2D(filters, kernel_size=3, padding="same")
            self.bn1 = BatchNormalization()
            self.layer_norm1 = LayerNormalization(axis=3 , center=True , scale=True)
        def call(self, inputs, *args, **kwargs):
            x = self.conv0(inputs)
            x = self.layer_norm0(x)
            x = self.bn0(x)
            x = self.relu0(x)

            x = self.conv1(x)
            x = self.layer_norm1(x)
            x = self.bn1(x)

            return x

    class DeepPath(Layer):
        def __init__(self, filters: int, strides: int):
            super().__init__()
            self.conv0 = Conv2D(filters, kernel_size=1, padding="same", strides=strides)
            self.bn0 = BatchNormalization()
            self.layer_norm0 = LayerNormalization(axis=3 , center=True , scale=True)
            self.relu0 = ReLU()

            self.conv1 = Conv2D(filters, kernel_size=3, padding="same")
            self.bn1 = BatchNormalization()
            self.layer_norm1 = LayerNormalization(axis=3 , center=True , scale=True)
            self.relu1 = ReLU()

            self.conv2 = Conv2D(filters * 4, kernel_size=1, padding="same")
            self.bn2 = BatchNormalization()
            self.layer_norm2 = LayerNormalization(axis=3 , center=True , scale=True)

        def call(self, inputs, *args, **kwargs):
            x = self.conv0(inputs)
            x = self.layer_norm0(x)
            x = self.bn0(x)
            x = self.relu0(x)

            x = self.conv1(x)
            x = self.layer_norm1(x)
            x = self.bn1(x)
            x = self.relu1(x)

            x = self.conv2(x)
            x = self.layer_norm2(x)
            x = self.bn2(x)

            return x

    def __init__(self, filters: int, strides: int, repeat: int,
                 architecture: ResidualBlockType = ResidualBlockType.SHALLOW):
        super().__init__()
        if architecture == ResidualBlockType.SHALLOW:
            MainPath = ResidualBlock.ShallowPath
        elif architecture == ResidualBlockType.DEEP:
            MainPath = ResidualBlock.DeepPath
        else:
            raise ValueError("Unknown residual block type")

        self.repeat = repeat
        self.blocks = []
        for i in range(repeat):
            main_path = MainPath(filters, strides=strides) if i == 0 else MainPath(filters, 1)
            residual_path = Conv2D(filters, kernel_size=1,
                                   strides=strides) if i == 0 else Layer()  # Layer class used here as Identity layer
            addition = Add()
            relu = ReLU()
            self.blocks.append((main_path, residual_path, addition, relu))

    def call(self, inputs, *args, **kwargs):
        x = inputs
        for i in range(self.repeat):
            main_path, residual_path, addition, relu = self.blocks[i]
            main_path_x = main_path(x)
            residual_path_x = residual_path(x)
            x = addition([main_path_x, residual_path_x])
            x = relu(x)
        return x


class ResNet18_attention(Model):

    def __init__(self):
        super().__init__()
        self.rescaling = tf.keras.layers.Rescaling(1/255)
        self.norm_data = tf.keras.layers.Normalization()
        self.conv1 = Conv2D(64, 7, strides=2, padding="same")
        self.attention = Sequential([Conv2D(64,7, strides=2, padding="same"),tfa.layers.AdaptiveAveragePooling2D((8,8)),Softmax()])
        self.bn1 = BatchNormalization()
        self.layer_norm = LayerNormalization(axis=3 , center=True , scale=True)
        self.relu1 = ReLU()
        self.pool1 = MaxPooling2D(3, strides=2, padding="same")

        residual_type = ResidualBlockType.SHALLOW

        self.conv2_x = ResidualBlock(filters=64, strides=1, repeat=2, architecture=residual_type)
        self.conv3_x = ResidualBlock(filters=128, strides=2, repeat=2, architecture=residual_type)
        self.conv4_x = ResidualBlock(filters=256, strides=2, repeat=2, architecture=residual_type)
        self.conv5_x = ResidualBlock(filters=512, strides=2, repeat=2, architecture=residual_type)

        self.global_pool = GlobalAvgPool2D()
        self.fc = Dense(100)

        self.build(input_shape=(None, 32, 32, 3))
        self.call(Input(shape=(32, 32, 3)))

    def call(self, inputs, training=None, mask=None):
        x = self.rescaling(inputs)
        x = self.norm_data(x)
        x = self.conv1(x)

        x_attention =  self.attention(x)
        x = self.layer_norm(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = x*x_attention
        x = self.conv2_x(x)

        x = self.conv3_x(x)
        x = self.conv4_x(x)
        x = self.conv5_x(x)
        x = self.global_pool(x)
        x = self.fc(x)

        return x

    def get_config(self):
        pass
task_3 = ResNet18_attention()
task_3 .summary()

Model: "res_net18_attention_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling_3 (Rescaling)      (None, 32, 32, 3)         0         
_________________________________________________________________
normalization_2 (Normalizati (None, 32, 32, 3)         7         
_________________________________________________________________
conv2d_45 (Conv2D)           (None, 16, 16, 64)        9472      
_________________________________________________________________
sequential_3 (Sequential)    (None, 8, 8, 64)          200768    
_________________________________________________________________
batch_normalization_34 (Batc (None, 16, 16, 64)        256       
_________________________________________________________________
layer_normalization_34 (Laye (None, 16, 16, 64)        128       
_________________________________________________________________
re_lu_34 (ReLU)              (None, 16, 16, 6

In [16]:
learning_rate = 1e-3
lr_decay = ExponentialDecay(learning_rate, decay_steps=100, decay_rate=0.96)
task_3.compile(optimizer=Adam(lr_decay),
                  loss=SparseCategoricalCrossentropy(from_logits=True),
                  metrics=["accuracy"])
epochs = 50
history_18 = task_3.fit(train_dataset, validation_data=val_dataset, epochs=epochs)
loss_1, acc_1 = task_3.evaluate(test_dataset)
print("Accuracy:", acc_1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy: 0.41280001401901245
