# Residual Networks

Source: [Dive into Deep Learning](https://d2l.ai/chapter_convolutional-modern/resnet.html)

In [None]:
# pip install d2l

In [30]:
# Importing libraries
import tensorflow as tf
from d2l import tensorflow as d2l

In [31]:
class Residual(tf.keras.Model):
    """The Residual block of ResNet models."""
    def __init__(self, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(num_channels, padding='same',
                                            kernel_size=3, strides=strides)
        self.conv2 = tf.keras.layers.Conv2D(num_channels, kernel_size=3,
                                            padding='same')
        self.conv3 = None
        if use_1x1conv:
            self.conv3 = tf.keras.layers.Conv2D(num_channels, kernel_size=1,
                                                strides=strides)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.bn2 = tf.keras.layers.BatchNormalization()

    def call(self, X):
        Y = tf.keras.activations.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3 is not None:
            X = self.conv3(X)
        Y += X
        return tf.keras.activations.relu(Y)

In this Residual block, there are two 3 x 3 convolutional layers with the same number of output channels. Each convolutional layer is followed by a batch normalization layer and ReLU activation function. Then, we skip these two convolution operations and add the input directly before the final ReLU activation function.

We use the 1x1 convolution to match the dimensions when the input and output feature maps have different shapes.

The `call` mwthod defines the forward pass of the residual block. It applies the first convolutional layer `conv1` followed by batch normalization `bn1` and ReLU activation. Then, it applies the second convolutional layer `conv2` followed by batch normalization `bn2`. If `use_1x1conv` is True, it applies a 1x1 convolution (conv3) to the input tensor to match the dimensions. Finally, it adds the original input tensor X to the output of the second convolutional layer, and applies ReLU activation.

This is a situation where the input and output are of the same shape, where 1 x 1 convolution is not needed.

In [4]:
# Example
blk = Residual(3)
X = tf.random.normal((4, 6, 6, 3))
Y = blk(X)
Y.shape

TensorShape([4, 6, 6, 3])

We also have an option to halve the output height and width while increasing the number of output channels.

In [5]:
blk = Residual(6, use_1x1conv=True, strides=2)
blk(X).shape

TensorShape([4, 3, 3, 6])

In [32]:
class ResNet(d2l.Classifier):
    def b1(self):
        return tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(64, kernel_size=7, strides=2,
                                   padding='same'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPool2D(pool_size=3, strides=2,
                                      padding='same')])

The first two layers of ResNet are:
- the 7x7 convolutional layer with 64 output channels and a stride of 2.
- the 3x3 Max Pool layer with stride of 2

In [33]:
@d2l.add_to_class(ResNet)
def block(self, num_residuals, num_channels, first_block=False):
    blk = tf.keras.models.Sequential()
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.add(Residual(num_channels, use_1x1conv=True, strides=2))
        else:
            blk.add(Residual(num_channels))
    return blk

In [34]:
@d2l.add_to_class(ResNet)
def __init__(self, arch, lr=0.1, num_classes=10):
    super(ResNet, self).__init__()
    self.save_hyperparameters()
    self.net = tf.keras.models.Sequential(self.b1())
    for i, b in enumerate(arch):
        self.net.add(self.block(*b, first_block=(i==0)))
    self.net.add(tf.keras.models.Sequential([
        tf.keras.layers.GlobalAvgPool2D(),
        tf.keras.layers.Dense(units=num_classes)]))

Together with the first 7x7 convolutional layer and the final fully connected layer, there are 18 layers in total. Therefore, this model is called ResNet-18.
By configuring different number of channels and residual blocks in the module, we can create different ResNet models, such as the deeper 152-layer ResNet-152.

![](https://d2l.ai/_images/resnet18-90.svg)

In [35]:
class ResNet18(ResNet):
  def __init__(self, lr=0.1, num_classes=10):
    super().__init__(((2, 64), (2, 128), (2, 256), (2, 512)),
                     lr, num_classes)

In [36]:
ResNet18().layer_summary((1, 96, 96, 1))

Sequential output shape:	 (1, 24, 24, 64)
Sequential output shape:	 (1, 24, 24, 64)
Sequential output shape:	 (1, 12, 12, 128)
Sequential output shape:	 (1, 6, 6, 256)
Sequential output shape:	 (1, 3, 3, 512)
Sequential output shape:	 (1, 10)


## ResNet101 Model

In [16]:
input_shape = (32, 32, 3)

In [18]:
## ResNet101 Model
def createResNetModel():
  pretrainedModel = tf.keras.applications.ResNet101(include_top=False,
                                                 weights=None,
                                                 input_shape=input_shape)

  for resnetLayer in pretrainedModel.layers:
    resnetLayer.trainable = True

  # additional layers to the model
  resnetModel = tf.keras.models.Sequential()
  resnetModel.add(pretrainedModel)
  resnetModel.add(tf.keras.layers.Flatten())
  resnetModel.add(tf.keras.layers.Dense(2048, activation='relu'))
  resnetModel.add(tf.keras.layers.Dropout(.5))
  resnetModel.add(tf.keras.layers.Dense(1024, activation='relu'))
  resnetModel.add(tf.keras.layers.Dropout(.5))
  resnetModel.add(tf.keras.layers.Dense(512, activation='relu'))
  resnetModel.add(tf.keras.layers.Dropout(.5))
  resnetModel.add(tf.keras.layers.Dense(10, activation='softmax'))
  resnetModel.summary()
  return resnetModel

resnet101_model = createResNetModel()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet101 (Functional)      (None, 1, 1, 2048)        42658176  
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 2048)              4196352   
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                                 
 dense_1 (Dense)             (None, 1024)              2098176   
                                                                 
 dropout_1 (Dropout)         (None, 1024)              0         
                                                                 
 dense_2 (Dense)             (None, 512)              

In [37]:
resnet101_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])