<a href="https://colab.research.google.com/github/ITAM-DS/deep-learning/blob/master/codigo/08-VGG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# !pip install -U d2l
from d2l import tensorflow as d2l
import tensorflow as tf
import numpy as np

In [3]:
class Residual(tf.keras.Model): 
    """The Residual block of ResNet."""
    def __init__(self, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(
            num_channels, padding='same', kernel_size=3, strides=strides)
        self.conv2 = tf.keras.layers.Conv2D(
            num_channels, kernel_size=3, padding='same')
        self.conv3 = None
        if use_1x1conv:
            self.conv3 = tf.keras.layers.Conv2D(
                num_channels, kernel_size=1, strides=strides)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.bn2 = tf.keras.layers.BatchNormalization()

    def call(self, X):
        Y = tf.keras.activations.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3 is not None:
            X = self.conv3(X)
        Y += X
        return tf.keras.activations.relu(Y)


In [4]:
b1 = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')])

In [5]:
class ResnetBlock(tf.keras.layers.Layer):
    def __init__(self, num_channels, num_residuals, first_block=False,
                 **kwargs):
        super(ResnetBlock, self).__init__(**kwargs)
        self.residual_layers = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                self.residual_layers.append(
                    Residual(num_channels, use_1x1conv=True, strides=2))
            else:
                self.residual_layers.append(Residual(num_channels))

    def call(self, X):
        for layer in self.residual_layers.layers:
            X = layer(X)
        return X


In [6]:
b2 = ResnetBlock(64, 2, first_block=True)
b3 = ResnetBlock(128, 2)
b4 = ResnetBlock(256, 2)
b5 = ResnetBlock(512, 2)


In [7]:
# Recall that we define this as a function so we can reuse later and run it
# within `tf.distribute.MirroredStrategy`'s scope to utilize various
# computational resources, e.g. GPUs. Also note that even though we have
# created b1, b2, b3, b4, b5 but we will recreate them inside this function's
# scope instead
def net():
    return tf.keras.Sequential([
        # The following layers are the same as b1 that we created earlier
        tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
        # The following layers are the same as b2, b3, b4, and b5 that we
        # created earlier
        ResnetBlock(64, 2, first_block=True),
        ResnetBlock(128, 2),
        ResnetBlock(256, 2),
        ResnetBlock(512, 2),
        tf.keras.layers.GlobalAvgPool2D(),
        tf.keras.layers.Dense(units=10)])


In [8]:
X = tf.random.uniform(shape=(1, 224, 224, 1))
for layer in net().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)


Conv2D output shape:	 (1, 112, 112, 64)
BatchNormalization output shape:	 (1, 112, 112, 64)
Activation output shape:	 (1, 112, 112, 64)
MaxPooling2D output shape:	 (1, 56, 56, 64)
ResnetBlock output shape:	 (1, 56, 56, 64)
ResnetBlock output shape:	 (1, 28, 28, 128)
ResnetBlock output shape:	 (1, 14, 14, 256)
ResnetBlock output shape:	 (1, 7, 7, 512)
GlobalAveragePooling2D output shape:	 (1, 512)
Dense output shape:	 (1, 10)


In [9]:
X = tf.random.uniform((1, 224, 224, 1))
parameters = 0

for layer in net().layers:
    X = layer(X)
    print(layer.__class__.__name__, "Parameters: ",  
          np.sum([np.prod(p.shape) for p in layer.get_weights()]), 
          '\t. Output shape:\t', X.shape)
    parameters += np.sum([np.prod(p.shape) for p in layer.get_weights()])

print("--"*30)
print("Total parameters: ", parameters)


Conv2D Parameters:  3200 	. Output shape:	 (1, 112, 112, 64)
BatchNormalization Parameters:  256 	. Output shape:	 (1, 112, 112, 64)
Activation Parameters:  0.0 	. Output shape:	 (1, 112, 112, 64)
MaxPooling2D Parameters:  0.0 	. Output shape:	 (1, 56, 56, 64)
ResnetBlock Parameters:  148736 	. Output shape:	 (1, 56, 56, 64)
ResnetBlock Parameters:  526976 	. Output shape:	 (1, 28, 28, 128)
ResnetBlock Parameters:  2102528 	. Output shape:	 (1, 14, 14, 256)
ResnetBlock Parameters:  8399360 	. Output shape:	 (1, 7, 7, 512)
GlobalAveragePooling2D Parameters:  0.0 	. Output shape:	 (1, 512)
Dense Parameters:  5130 	. Output shape:	 (1, 10)
------------------------------------------------------------
Total parameters:  11186186.0


In [9]:
lr, num_epochs, batch_size = 0.1, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=28)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr)

## Ejercicios

Tomado de la sección del [libro de texto](https://d2l.ai/chapter_convolutional-modern/resnet.html)

```{markdown}
1. Refer to Table 1 in the ResNet paper [He et al., 2016a] to implement different variants.
2. In subsequent versions of ResNet, the authors changed the “convolution, batch normalization, and activation” structure to the “batch normalization, activation, and convolution” structure. Make this improvement yourself. See Figure 1 in [He et al., 2016b] for details.
```
[He et al., 2016b](https://arxiv.org/abs/1603.05027)