# ResNet and DenseNet
## Imports
This week we work with the cifar10 dataset. We already split it in training and test set. It is convenient to use since it already is a keras.dataset.

In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf
import tensorflow.keras.layers as layers
from tensorflow.keras.layers import Layer
from tensorflow.keras import Model
import numpy as np
import matplotlib.pyplot as plt

train_ds, test_ds = tfds.load('cifar10', split=['train', 'test'], as_supervised=True)

[1mDownloading and preparing dataset cifar10/3.0.2 (download: 162.17 MiB, generated: 132.40 MiB, total: 294.58 MiB) to /root/tensorflow_datasets/cifar10/3.0.2...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]






0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteRNPFZ7/cifar10-train.tfrecord


  0%|          | 0/50000 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteRNPFZ7/cifar10-test.tfrecord


  0%|          | 0/10000 [00:00<?, ? examples/s]

[1mDataset cifar10 downloaded and prepared to /root/tensorflow_datasets/cifar10/3.0.2. Subsequent calls will reuse this data.[0m


## Preprocessing
The preprocessing is basically the same as last week. We cast the data to float32, normalize it, one-hot encode it, cache, shuffle, batch and prefatch.

In [None]:
def prepare_data(cifar):
  """
  Preprocessing pipeline, prepares data for later use in neuronal network
  ## Params
    - cifar (keras.dataset) = cifar dataset
  """
  #convert data from uint8 to float32
  cifar = cifar.map(lambda img, target: (tf.cast(img, tf.float32), target))
  #sloppy input normalization, just bringing image values from range [0, 255] to [-1, 1]
  cifar = cifar.map(lambda img, target: (img/256, target))
  #create one-hot targets
  cifar = cifar.map(lambda img, target: (img, tf.one_hot(target, depth=10)))
  #cache this progress in memory, as there is no need to redo it; it is deterministic after all
  cifar = cifar.cache()
  #shuffle, batch, prefetch
  cifar = cifar.shuffle(1000)
  cifar = cifar.batch(32)
  cifar = cifar.prefetch(20)
  #return preprocessed dataset
  return cifar

train_dataset = train_ds.apply(prepare_data)
test_dataset = test_ds.apply(prepare_data)

In [None]:
for i in train_dataset.take(1):
  print(i.shape)

AttributeError: ignored

## ResNet
Here are our Residual Block and our Residual Network. 

In [None]:
class Residual_Block(Layer):
    """Residual_Block represents a Layer in our Network, inherits from keras.layer.Layer."""
    
    
    def __init__(self, input_dim):
        """
        Constructor, calls super Constructor.
        One residual block consists of 3 pairs of convulutional layer + batch normalization. Trainable is set to True. 
        ## Params
            - input_dim (int) = input dimension (to save the original input dimension)
        """
        super(Residual_Block, self).__init__()
        self.conv1 = layers.Conv2D(filters=24, kernel_size=1, strides=(1, 1), padding="same", activation="relu", kernel_regularizer=tf.keras.regularizers.l1_l2)
        self.norm1 = layers.BatchNormalization()
        self.conv2 = layers.Conv2D(filters=24, kernel_size=3, strides=(1, 1), padding="same", activation="relu", kernel_regularizer=tf.keras.regularizers.l1_l2)
        self.norm2 = layers.BatchNormalization()
        self.conv3 = layers.Conv2D(filters=input_dim, kernel_size=1, strides=(1, 1), padding="same", activation="relu", kernel_regularizer=tf.keras.regularizers.l1_l2)
        self.norm3 = layers.BatchNormalization()
        self.norm1.trainable = True
        self.norm2.trainable = True
        self.norm3.trainable = True

    def call(self, inputs):
        """
        Update weights/biases of each neuron
        ## Params
            - inputs = inputs data
        """
        x = self.conv1(inputs)
        x = self.norm1(x)
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.conv3(x)
        x = self.norm3(x)
        return x

class ResNet(Model):
    """ResNet represents Residual Network/ model."""


    def __init__(self, n_blocks=2):
        """
        Constructor, calls super Constructor. 
        Is initialized with 1 convolutional layer followed by specified number of residual blocks followed by global average pooling layer and Dense layer as output layer.
        ## Params
            - n_blocks (int) = number of Blocks in ResNet
        """
        super(ResNet, self).__init__()

        self.conv1 = layers.Conv2D(filters=24, kernel_size=1, strides=(1, 1), padding="same", activation="relu", kernel_regularizer=tf.keras.regularizers.l1_l2)
        self.blocks = [Residual_Block(24) for block in range(n_blocks)]
        self.global_pool = layers.GlobalAvgPool2D()
        self.out = layers.Dense(10, activation="softmax")

    @tf.function
    def call(self, inputs):
        """
        Update weights/biases of each neuron
        ## Params
            - inputs = inputs data
        """
        x = self.conv1(inputs)
        for block in self.blocks:
            x = block(x)
        x = self.global_pool(x)
        return self.out(x)

## DenseNet
Here are our Transition Layer, our Dense Block and our Dense Network.

In [None]:
class Transition_Layers(Layer):
    """Transition Layer for our DenseNet."""


    def __init__(self,reduce_to):
        """Constructor calls super constructor.
        Initialized with Batch normalization (trainable=True), a convolutional layer and a average pooling layer.
        ## Params
            - reduce_to (int) = number of filters in Convolutional layer
        """
        super(Transition_Layers, self).__init__()
        self.batch1 = layers.BatchNormalization()
        self.batch1.trainable = True
        self.conv1 = layers.Conv2D(filters = reduce_to,kernel_size=(1,1),padding="valid",use_bias=False)
        self.pooling = layers.AvgPool2D(pool_size=(2,2), strides = (2,2), padding = 'valid')


    def call(self, inputs):
        """
        Update weights/biases of each neuron
        ## Params
            - inputs = inputs data
        """
        x = self.batch1(inputs)
        x = tf.nn.relu(x)
        x = self.conv1(x)
        x = self.pooling(x)
        return x
        

class Dense_Block(Layer):
    """Dense block for our DenseNet"""


    def __init__(self,  n_filters, new_channels):
        """Constructor calls super Constructor
        Initializes 1 Block for our DenseNet, consist 2 pairs of Batch normalization (trainable=True) + convolutional layer.
        ## Params
            - n_filters (int) = number of filters for first conv layer
            - new_channels (int) = number of filters for second conv layer
        """
        super(Dense_Block, self).__init__()
        self.batch1 = layers.BatchNormalization()
        self.batch1.trainable = True
        self.conv1 = layers.Conv2D(n_filters, kernel_size=(1,1),padding="valid",use_bias=False)
        self.batch2 = layers.BatchNormalization()
        self.batch2.trainable = True
        self.conv2 = layers.Conv2D(new_channels, kernel_size=(3,3), padding="same", use_bias=False)
        
    def call(self, inputs):
        """
        Update weights/biases of each neuron
        ## Params
            - inputs = input data
        """
        x = self.batch1(inputs)
        x = tf.nn.relu(x)
        x = self.conv1(x)
        x = self.batch2(x)
        x = tf.nn.relu(x)
        x = self.conv2(x)
        return tf.concat([x,inputs],axis=-1)


class DenseNet(Model):
    """DenseNet represents DenseNet/model"""


    def __init__(self, n_filters=128, new_channels=64, n_transitions=64):
        """Constructor calls super Constructor
        Initialized with starting layers, a Dense block, a Transition layer, again a Dense block and an output layer.
        ## Params
            - n_filters (int) = number of filters for first conv layer
            - new_channels (int) = number of filters for second conv layer
            - n_transitions (int) = number of transition layers
        """
        super(DenseNet, self).__init__()

        #Starting layers
        self.starting_layers = []
        self.starting_layers.append(layers.ZeroPadding2D(padding=(3, 3), data_format=None))
        self.starting_layers.append(layers.Conv2D(filters = 64, kernel_size=(7,7),padding="valid",use_bias=False))
        self.starting_layers.append(layers.BatchNormalization())
        self.starting_layers[-1].trainable = True
        self.starting_layers.append(tf.nn.relu)
        self.starting_layers.append(layers.ZeroPadding2D(padding=(1, 1), data_format=None))

        # First block
        self.block1 = [Dense_Block(n_filters, new_channels) for _ in range(3)]
        self.block1.append(Transition_Layers(n_transitions))

        # Second block
        self.block2 = [Dense_Block(n_filters, new_channels) for _ in range(3)]
        self.block2.append(Transition_Layers(n_transitions))

        #Third
        self.block3 = [Dense_Block(n_filters, new_channels) for _ in range(3)]

        # out Neurons
        self.outgoing_layers = []
        self.outgoing_layers.append(layers.BatchNormalization())
        self.outgoing_layers[-1].trainable = True
        self.outgoing_layers.append(tf.nn.relu)
        self.outgoing_layers.append(layers.GlobalAveragePooling2D())
        self.outgoing_layers.append(layers.Dense(10,activation="softmax"))
    

    @tf.function
    def call(self, inputs):
        """
        Update weights/biases of each neuron
        ## Params
            - inputs = inputs data
        """
        x = inputs
        for layer in self.starting_layers:
            x = layer(x)

        for layer in self.block1:
            x = layer(x)

        for layer in self.block2:
            x = layer(x)

        for layer in self.block3:
            x = layer(x)

        for layer in self.outgoing_layers:
            x = layer(x)
    
        return x

## Train and Test function
The training and test function are the same as last week. 

In [None]:
def train_step(model, input, target, loss_function, optimizer):
  """
  Function for model training
    ## Params
        - model (Model )= instance of Model class 
        - input (keras.dataset) = input for model
        - target (keras.dataset) = targets 
        - loss_function (keras.losses.LossFunction) = loss function 
        - optimizer (keras.optimizer.Optimizer) = tensorflow optimizer 
  """
  # loss_object and optimizer_object are instances of respective tensorflow classes
  with tf.GradientTape() as tape:
    prediction = model(input)
    loss = loss_function(target, prediction)
    gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  return loss


def test(model, test_data, loss_function):
  """
    Function for model testing 
    ## Params
        - model (keras.Model) = instance of Model class 
        - test_data (keras.dataset) = data for testing 
        - loss_function (keras.losses.LossFunction) = loss function
  """
  # test over complete test data
  test_accuracy_aggregator = []
  test_loss_aggregator = []

  for (input, target) in test_data:
    prediction = model(input)
    sample_test_loss = loss_function(target, prediction)
    sample_test_accuracy =  np.argmax(target, axis=1) == np.argmax(prediction, axis=1)
    sample_test_accuracy = np.mean(sample_test_accuracy)
    test_loss_aggregator.append(sample_test_loss.numpy())
    test_accuracy_aggregator.append(np.mean(sample_test_accuracy))

  test_loss = tf.reduce_mean(test_loss_aggregator)
  test_accuracy = tf.reduce_mean(test_accuracy_aggregator)

  return test_loss, test_accuracy

## Running our Code
We first run our DenseNet and than our ResNet. The accuracies are printed out. 
- Training samples: 1000 
- Testing samples: 100
- Epochs: 10 epochs
- learning rate: 0.001
- Loss: Crossentropy loss
- Optimizer: Adam

In [None]:
tf.keras.backend.clear_session()

#For showcasing we only use a subset of the training and test data (generally use all of the available data!)
train_dataset = train_dataset.take(10000)
test_dataset = test_dataset.take(1000)

### Hyperparameters
num_epochs = 10
learning_rate = 0.001

# Initialize the models.
model_dense = DenseNet()
model_res = ResNet(5)

# Initialize the loss: categorical cross entropy. Check out 'tf.keras.losses'.
cross_entropy_loss = tf.keras.losses.CategoricalCrossentropy()

# Initialize the optimizer: Adam with default parameters. Check out 'tf.keras.optimizers'
optimizer = tf.keras.optimizers.Adam(learning_rate)

def train(model):
    """
    Function for testing our Models
    ## Params
        - model (keras.Model) = Instance of model
    """
    # Initialize lists for later visualization.
    train_losses = []
    test_losses = []
    test_accuracies = []

    #testing once before we begin
    test_loss, test_accuracy = test(model, test_dataset, cross_entropy_loss)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)

    #check how model performs on train data once before we begin
    train_loss, _ = test(model, train_dataset, cross_entropy_loss)
    train_losses.append(train_loss)

    # We train for num_epochs epochs.
    for epoch in range(num_epochs):
        print(f'Epoch: {str(epoch)} starting with accuracy {test_accuracies[-1]}')

        #training (and checking in with training)
        epoch_loss_agg = []
        for input,target in train_dataset:
            train_loss = train_step(model, input, target, cross_entropy_loss, optimizer)
            epoch_loss_agg.append(train_loss)
        
        #track training loss
        train_losses.append(tf.reduce_mean(epoch_loss_agg))

        #testing, so we can track accuracy and test loss
        test_loss, test_accuracy = test(model, test_dataset, cross_entropy_loss)
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)

    return train_losses, test_losses, test_accuracies 

# calling test function
print("Accuracy for our DenseNet")
erg_dense = train(model_dense)
print("\n")
print("Accuracy for our ResNet")
erg_res = train(model_res)

Accuracy for our DenseNet
Epoch: 0 starting with accuracy 0.10013977635782748
Epoch: 1 starting with accuracy 0.41553514376996803
Epoch: 2 starting with accuracy 0.4910143769968051
Epoch: 3 starting with accuracy 0.536841054313099
Epoch: 4 starting with accuracy 0.5915535143769968
Epoch: 5 starting with accuracy 0.6291932907348243
Epoch: 6 starting with accuracy 0.6381789137380192
Epoch: 7 starting with accuracy 0.6795127795527156
Epoch: 8 starting with accuracy 0.705770766773163


## Visualization
### DenseNet

In [None]:
# Assigning values
train_losses_dense= erg_dense[0] 
test_losses_dense = erg_dense[1] 
test_accuracies_dense = erg_dense[2]

# Visualize accuracy and loss for training and test data.
plt.figure()
line1, = plt.plot(train_losses_dense)
line2, = plt.plot(test_losses_dense)
line3, = plt.plot(test_accuracies_dense)
plt.xlabel("Training steps")
plt.ylabel("Loss/Accuracy")
plt.xticks(np.arange(num_epochs+1))
plt.legend((line1,line2, line3),("training","test", "test accuracy"))
plt.show()

### ResNet

In [None]:
# Assigning values
train_losses_res = erg_res[0] 
test_losses_res = erg_res[1] 
test_accuracies_res = erg_res[2]

# Visualize accuracy and loss for training and test data.
plt.figure()
line1, = plt.plot(train_losses_res)
line2, = plt.plot(test_losses_res)
line3, = plt.plot(test_accuracies_res)
plt.xlabel("Training steps")
plt.ylabel("Loss/Accuracy")
plt.xticks(np.arange(num_epochs+1))
plt.legend((line1,line2, line3),("training","test", "test accuracy"))
plt.show()

### Training losses

In [None]:
# Barplot
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
epochs = np.arange(11)
ax.bar(epochs, train_losses_res, 0.3)
ax.bar(epochs+0.3, train_losses_dense, 0.3)
plt.legend((line1,line2),("res","dense"))
ax.set_title('Training losses')
ax.set_ylabel('Loss')
ax.set_xlabel('Training steps')
ax.set_xticks(np.arange(11))
plt.show()

# Graph
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
line1, = plt.plot(train_losses_res)
line2, = plt.plot(train_losses_dense)
plt.xlabel("Training steps")
plt.ylabel("Loss")
plt.xticks(np.arange(num_epochs+1))
plt.legend((line1,line2, line3),("res","dense"))
plt.show()

NameError: ignored

### Test losses

In [None]:
# Barplot
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
epochs = np.arange(11)
ax.bar(epochs, test_losses_res, 0.3)
ax.bar(epochs+0.3, test_losses_dense, 0.3)
plt.legend((line1,line2),("res","dense"))
ax.set_title('Test losses')
ax.set_ylabel('Losses')
ax.set_xlabel('Epochs')
ax.set_xticks(np.arange(11))
plt.show()

# Graph
plt.figure()
line1, = plt.plot(test_losses_res)
line2, = plt.plot(test_losses_dense)
plt.xlabel("Training steps")
plt.ylabel("Loss")
plt.xticks(np.arange(num_epochs+1))
plt.legend((line1,line2, line3),("res","dense"))
plt.show()

### Test Accuracy

In [None]:
# Barplot
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
epochs = np.arange(11)
ax.bar(epochs, test_accuracies_res, 0.3)
ax.bar(epochs+0.3, test_accuracies_dense, 0.3)
plt.legend((line1,line2),("res","dense"))
ax.set_title('Test Accuracy')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epochs')
ax.set_xticks(np.arange(11))
plt.show()

# Graph
plt.figure()
line1, = plt.plot(test_accuracies_res)
line2, = plt.plot(test_accuracies_dense)
plt.xlabel("Training steps")
plt.ylabel("Accuracy")
plt.xticks(np.arange(num_epochs+1))
plt.legend((line1,line2, line3),("res","dense"))
plt.show()