# Creating custom NN models

## Importing libraries

In [1]:
import pandas as pd
import numpy as np
from ipynb.fs.full.Useful_funcs import data_pipeline, pre_model, create_huber # Custom funcs for data processing, modelling, compiling and training
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import selu, relu, elu
from tensorflow.keras.initializers import lecun_normal, he_normal
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.losses import mse

## Loading datasets

In [2]:
housing = fetch_california_housing()

In [3]:
x_train, x_train_scaled, x_valid, x_valid_scaled, x_test, x_test_scaled, y_train, y_valid, y_test = data_pipeline(housing)

## Creating a custom model

### Creating a Residual Block layer

- We will be creating the Residual Block which will be used multiple times in the model.

In [12]:
class Residual_block(keras.layers.Layer):
    def __init__(self, n_layers, units, **kwargs):
        super().__init__(**kwargs) # Initializing base class kwargs
        self.hidden = [keras.layers.Dense(units, activation = elu, kernel_initializer = he_normal()) for _ in range(n_layers)]
        # Creating the block of Dense layers
    def call(self, x):
        z = x
        for layer in self.hidden:
            z = layer(z) # Passing the inputs through the block of layers
        return z + x # Adding the output with the layer

- The above layer contains other layers. Keras automatically detects that the hidden attribute contains trackable objects, so their variables are automatically added to this layer's list of variables.

### Creating the model

- Now using the residual block we will be creating the custom model. We will be using the Subclassing API.

In [20]:
class Residual_regressor(keras.models.Model):
    def __init__(self, output_dims, **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = Dense(30, activation = elu, kernel_initializer = he_normal()) # The initial Dense layer
        self.block1 = Residual_block(2, 30) # The first residual-block
        self.block2 = Residual_block(2, 30) # The second residual-block
        self.out = Dense(output_dims) # The output layer
    def call(self, inputs): # The training method
        z = self.hidden1(inputs)
        for _ in range(1 + 3): # Passing the data 3 times through the residual block 
            z = self.block1(z)
        z = self.block2(z)
        return self.out(z)

In [21]:
pre_model()
model = Residual_regressor(1)

In [22]:
model.compile(loss = mse, optimizer = Nadam())

In [23]:
history = model.fit(x_train_scaled, y_train, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
model.evaluate(x_test_scaled, y_test)



0.6499764323234558

In [25]:
model.save('Custom_models')

INFO:tensorflow:Assets written to: Custom_models/assets


In [26]:
model = keras.models.load_model('Custom_models')

In [27]:
model.fit(x_train_scaled, y_train, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f01aead65e0>

- We can use the Sequential API to define the same model

In [48]:
pre_model()

In [11]:
input_shape = x_train_scaled.shape[1:]

In [50]:
model = Sequential()
model.add(Dense(30, activation = elu, kernel_initializer = he_normal, input_shape = input_shape))
for _ in range(4):
    model.add(Residual_block(2, 30))
model.add(Residual_block(2, 30))
model.add(Dense(1))

In [51]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                270       
_________________________________________________________________
residual_block (Residual_blo (None, 30)                1860      
_________________________________________________________________
residual_block_1 (Residual_b (None, 30)                1860      
_________________________________________________________________
residual_block_2 (Residual_b (None, 30)                1860      
_________________________________________________________________
residual_block_3 (Residual_b (None, 30)                1860      
_________________________________________________________________
residual_block_4 (Residual_b (None, 30)                1860      
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 3

In [52]:
model.compile(loss = mse, optimizer = Nadam())

In [53]:
history = model.fit(x_train_scaled, y_train, epochs = 5, validation_data = (x_valid_scaled, y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [54]:
model.evaluate(x_test_scaled, y_test)



8.00838565826416

## Losses and metrics based on model internals

- We will be building a model having 5 hidden dense layers and an output layer. This model will also have an auxilliary output on top of the upper hidden layer.
- The loss associated to this auxilliary output will be called the reconstruction loss, it is the mse between the reconstruction and the inputs.
- By adding this reconstruction loss we will encourage the model to preserve as much information as possible through the hidden layers, even information that is not directly used for the regression task.
- In practice this loss sometimes improves generalization.

In [48]:
class Reconstructing_regressor(keras.models.Model):
    def __init__(self, output_dims, **kwargs):
        super().__init__(**kwargs) # Initializing kwargs
        self.hidden = [Dense(30, activation = selu, kernel_initializer = lecun_normal()) for _ in range(5)] # Hidden layers
        self.out = Dense(output_dims) # Output layer
    def build(self, batch_input_shape):
        n_inputs = batch_input_shape[-1]
        self.reconstruct = Dense(n_inputs) # Reconstruction layer
    def call(self, inputs, training = None):
        z = inputs
        for layer in self.hidden:
            z = layer(z)
        reconstruction = self.reconstruct(z) # Reconstruction output
        recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs)) # Calculating reconstruction loss 
        self.add_loss(0.05 * recon_loss) # Scaling down reconstruction loss
        return self.out(z)

- The reconstruction layer must be created in the build() method since its no. of units must be equal to the no. of inputs and this no. is unknown before the build method is called.
- The call() method is used to find the reconstruction loss and add it to the list of lossed using the add_loss() method. We scale down the reconstruction loss by multiplying it with 0.05(this is a hyperparameter we can tune). This ensures that the reconstruction loss doesnt dominate the main loss. 

In [49]:
pre_model()

In [50]:
model = Reconstructing_regressor(1)

In [51]:
x_train.shape[-1]

8

In [52]:
model.compile(loss = mse, optimizer = Nadam())

In [53]:
model.build(x_train.shape) # While using subclassing API, we need to call the build method as a standalone method instead of building the model using real data.

In [54]:
history = model.fit(x_train_scaled, y_train, epochs = 5) # TODO: Follow up on tensorflow issue

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model.evaluate(x_test_scaled, )