<h2> More on Tensorflow </h2> 

GPUS dramatically speed up computations by splitting computations into many smaller chunks and running them in parallel across many GPU threads. 

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [3]:
a.dtype

dtype('int32')

In [4]:
b = tf.Variable(a)
b

<tf.Variable 'Variable:0' shape=(2, 3) dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]])>

In [5]:
b.scatter_nd_update(indices = [[1,1]],updates = [100])
b

<tf.Variable 'Variable:0' shape=(2, 3) dtype=int32, numpy=
array([[  1,   2,   3],
       [  4, 100,   6]])>

In [6]:
#creating a simple tensor
t = tf.constant([[1,2,3],[4,5,6]])
t.shape

TensorShape([2, 3])

In [7]:
#indexing
t[:,1:]

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[2, 3],
       [5, 6]])>

In [8]:
tf.transpose(t)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1, 4],
       [2, 5],
       [3, 6]])>

In [9]:
tf.matmul(t,tf.transpose(t))

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[14, 32],
       [32, 77]])>

<h3> Tensors and Numpy </h3> 

In [10]:
import numpy as np
a = np.array([2,4,5])

In [11]:
tf.constant(a)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 4, 5])>

In [12]:
#casting from 32bit float to a 64 bit float to sum in TF
t2 = tf.constant(40.,dtype = tf.float64)
t2

<tf.Tensor: shape=(), dtype=float64, numpy=40.0>

In [13]:
tf.constant(2.0) + tf.cast(t2,tf.float32)

<tf.Tensor: shape=(), dtype=float32, numpy=42.0>

<h3> Variables in Tensorflow </h3> 

We cannot modify constant tensors, however we can modify tf.Variable. These are required as we need to tweak weights in a neural network. 

In [14]:
v = tf.Variable([[1.,2.,3.],[4.,5.,6.]])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

A tf.Variable acts like a constant tensor and allows to perform the same operations. But it also allows you to modify th variable using the assign() method.

In [15]:
a = tf.constant([[1,2,3],[4,5,6]])
a

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]])>

In [16]:
v.assign(2*v)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [17]:
v[0,1].assign(42)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [18]:
v.scatter_nd_update(indices=[[0,0],[1,2]],updates = [100.,200.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[100.,  42.,   6.],
       [  8.,  10., 200.]], dtype=float32)>

In [19]:
v.scatter_nd_add(indices=[[0,0],[1,2]],updates = [100.,200.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[200.,  42.,   6.],
       [  8.,  10., 400.]], dtype=float32)>

scatter methods allows you to specify the index position and add to it

<h3> Sparse Tensors </h3> 

In a Sparse Tensor, you always need to give the positions first, then the values, then the dense shape

In [20]:
s = tf.SparseTensor(indices = [[0,1],[1,0],[2,3]],
                    values = [1.,2.,3.], 
                    dense_shape=[3,4])

In [21]:
tf.sparse.to_dense(s)

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[0., 1., 0., 0.],
       [2., 0., 0., 0.],
       [0., 0., 0., 3.]], dtype=float32)>

Sparse tensors need to always be given in order of indices

In [22]:
s5 = tf.SparseTensor(indices=[[0, 1], [0, 2]],
                     values=[1., 2.],
                     dense_shape=[3, 4])

In [23]:
tf.sparse.to_dense(s5)

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[0., 1., 2., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)>

<h3> Tensor Arrays </h3>

We first crrate the tensor with a fixed size. we cannot add past this

In [24]:
array = tf.TensorArray(dtype = tf.float32,size = 3)
array = array.write(0,tf.constant([1.,2.]))
array = array.write(1,tf.constant([3.,10.]))
array = array.write(2,tf.constant([5.,7.]))

<h3> Customizing models and training algorithms </h3> 

<h3> Custom Loss Functions </h3> 

In [25]:
#where y_true - y_pred absolute is less than 1, we want the squared loss and linear loss
def huber_fn(y_true,y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < 1
    squared_loss = tf.square(error)/2
    linear_loss = tf.abs(error) - 0.5
    return tf.where(is_small_error,squared_loss,linear_loss)

#we need to always use tensorflow functions to be used as custom loss functions. 

For the next step, we just apply this to our keras model

model.compile(loss = huber_fn, optimizer = "adam")

model.fit(.....)

In [26]:
#current implementation allows us to have one threshold of 1 in the Huber Function
#How to change the threshold?
def create_huber(threshold = 1.0):
    def huber_fn(y_true,y_pred):
        error = y_true - y_pred
        is_small_errors = tf.abs(error) <  threshold
        #mae loss
        squared_loss = tf.square(error)/2
        #mse loss
        linear_loss = threshold*tf.abs(error) - threshold**2/2
        #if your error is less than threshold, return squared loss, 
        #else return linear_loss
        return tf.where(is_small_errors, squared_loss,linear_loss)
    return huber_fn

model.compile(loss = create_huber(2.0), optimizer = "adam")

The above causes an issue when saving the model using 
keras.callbacks.ModelCheckpoint("modelname.h5", save_best_only = True)

When we load the model again we will have to specify which function is being called for the loss function as the loss function is a custom one here

model = keras.models.load_model("modelname.h5", custom_objects = {"huber_fn":create_huber(2.0)})

In [27]:
from tensorflow import keras

In [28]:
#to avoid the above we can inherit the Keras.losses.loss method

class HuberLoss(keras.losses.Loss):
    def __init__(self, threshold = 1.0, **kwargs):
        self.threshold = threshold
        #instantiate the superclass
        super().__init__(**kwargs)
    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error)/2
        linear_loss = self.threshold*tf.abs(error) - self.threshold**2/2
        return tf.where(is_small_error,squared_loss,linear_loss)
    def get_config(self):
        #you get the base configuration of superclass
        base_config = super().get_config()
        #add in the threshold and creates a new dictionary to be returned
        return {**base_config, "threshold":self.threshold}

get_config method returns a dictionary mapping each hyperparameter name to its value. It first calls the parent class's get_config() method, then adds the new hyperparameters to this dictionary.

from the above, when loading a model, we do this:

**model = keras.models.load_model("my_model_with_a_custom_loss_class.h5",
custom_objects={"HuberLoss": HuberLoss})**

Now we dont have to provide the threshold value too.

When we save the mode, Keras calls the loss instance (HuberLoss) get_config() method and the returned dictionary is stored as a JSON in the h5 file. 

When loaded, it calls **from_config()** and creates the instance of the class, passing the return from **from_config()** to **kwargs.

<h3>Custom Activation Functions, Initializers, Regularizers, and Constraints</h3>

In [29]:
#defining custom activation functions 
def my_softplus(z):
    return tf.math.log(tf.exp(z) + 1.0)

In [30]:
def my_glorot_initializer(shape,dtype = tf.float32):
    stddev = tf.sqrt(2/(shape[0] + shape[1]))
    return tf.random.normal(shape, stddev = stddev, dtype = dtype)

In [31]:
def my_li_regularizer(weights):
    return tf.reduce_sum(tf.abs(0.01 * weights))

In [32]:
#function to return only positive weights
def my_positive_weights(weights):
    return tf.where(weights<0, tf.zeros_like(weights),weights)

In [33]:
#custom regularizer using subclassing

class MyL1Regularizer(keras.regularizers.Regularizer):
    def __init__(self,regfactor):
        self.factor = regfactor
    def __call__(self,weights):
        return tf.reduce_sum(tf.abs(self.factor * weights))
    def get_config(self):
        return {"regfactor":self.factor}

For losses, layers and models we implement the call() method, and for regularizers, initializers and constraints we use the _____call_____() method. 

<h3>Custom Metrics </h3> 



In [34]:
a = tf.Variable([[2,2,2],[2,2,2]])
a = tf.cast(a,tf.float32)
a

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[2., 2., 2.],
       [2., 2., 2.]], dtype=float32)>

In [35]:
tf.reduce_sum(tf.abs(0.01*a))

<tf.Tensor: shape=(), dtype=float32, numpy=0.11999999>

At each training step the weights will be passed to the regularization function to compute the regularization loss. The return is then added to the main loss to get the final loss used for training. 

<h3> Custom Metrics </h3> 

Usually metrics keep track of the mean of a metric from each epoch. 

Suppose we have 5 true predictions, but 4 true positives. Thats 0.8 precision. Next epoch, we have 3 true predictions with 0 true positives. Thats 0 precision, but if we use mean, it becomes 0.4 precision overall

But the actual one is 8 true predictions with 4 true positives in total. Thats 0.5 precision. Hence we need an object to track the true positives and an object to track the predictions. 


In [36]:
from tensorflow import keras
precision = keras.metrics.Precision()
precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])

<tf.Tensor: shape=(), dtype=float32, numpy=0.8>

In [37]:
precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

Note above that the same precision object, under 2 runs prduces the overal precision of the data fed into it. Not a mean, or not a new precision for the the new run.

This is called a streaming metric. 

In [38]:
precision.variables

[<tf.Variable 'true_positives:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>,
 <tf.Variable 'false_positives:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>]

In [39]:
def create_huber(threshold = 1.0):
    def huber_fn(y_true,y_pred):
        error = y_true - y_pred
        is_small_errors = tf.abs(error) <  threshold
        #mae loss
        squared_loss = tf.square(error)/2
        #mse loss
        linear_loss = threshold*tf.abs(error) - threshold**2/2
        #if your error is less than threshold, return squared loss, 
        #else return linear_loss
        return tf.where(is_small_errors, squared_loss,linear_loss)
    return huber_fn

In [40]:
#creating streaming metrics
class HuberMetric(keras.metrics.Metric):
    def __init__(self,threshold = 1.0, **kwargs):
        super().__init__(**kwargs)
        self.threshold = threshold
        #returns a huber function to be used in this class
        self.huber_fn = create_huber(threshold)
        #add weight creates variables to keep track of attributes
        self.total = self.add_weight("total",initializer= "zeros")
        self.count = self.add_weight("count",initializer= "zeros")
    def update_state(self,y_true,y_pred,sample_weight = None):
        result = self.huber_fn(y_true, y_pred)
        self.total.assign_add(tf.reduce_sum(result))
        self.count.assign_add(tf.cast(tf.size(y_pred), tf.float32))
    def result(self):
        return self.total/self.count
    def get_config(self):
        base_config = self.get_config()
        return {**base_config, "threshold":self.threshold}

Both customPrecision and HuberMetric are inherriting from keras.metrics.Metric which is an abstract class. Abstract classes contain methods which need to be surely built in the child class.

In [41]:
class customPrecision(keras.metrics.Metric):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def update_state(self,ytrue, ypred,sample_weight = None):
        try: 
            for _class in self.unique_classes:
                self.total_true[_class] = self.total_true[_class] + tf.where(ytrue == _class).shape[0]
                self.total_prediction[_class] = self.total_prediction[_class] + tf.where(ypred == _class).shape[0]
                self.total_truepositives[_class] = self.total_truepositives[_class] + len([i for i in tf.where(ypred == _class) if i in tf.where(ytrue == _class)])
            #return [self.total_prediction,self.total_truepositives]
        except:
            self.unique,_,_ = tf.unique_with_counts(ytrue)
            #obtains the unique classes to a list that we can loop through
            self.unique_classes = self.unique.numpy()
            self.total_truepositives = {_class:0 for _class in self.unique_classes}
            self.total_prediction = {_class:0 for _class in self.unique_classes}
            self.precision_classbase = {_class:0 for _class in self.unique_classes}
            self.total_true = {_class:0 for _class in self.unique_classes}
            for _class in self.unique_classes:
                self.total_true[_class] = self.total_true[_class] + tf.where(ytrue == _class).shape[0]
                self.total_prediction[_class] = self.total_prediction[_class] + tf.where(ypred == _class).shape[0]
                self.total_truepositives[_class] = self.total_truepositives[_class] + len([i for i in tf.where(ypred == _class) if i in tf.where(ytrue == _class)])
            #return [self.total_prediction,self.total_truepositives]
    def result(self):
        for _class in self.unique_classes:
            self.precision_classbase[_class] = self.total_truepositives[_class]/self.total_prediction[_class]
            self.precision_classbase[_class] = self.precision_classbase[_class] * (self.total_true[_class]/sum(self.total_true.values()))
        return sum(self.precision_classbase.values())
    def get_config(self):
        base_config = self.get_config()
        return {**base_config}       

In [42]:
ytrue1 = tf.Variable([0, 1, 1, 1, 0, 1, 0, 1])
ypred1 = tf.Variable([1, 1, 0, 1, 0, 1, 0, 1])

In [43]:
prec_test1 = customPrecision()

In [44]:
prec_test1(ytrue1,ypred1)

<tf.Tensor: shape=(), dtype=float32, numpy=0.75>

In [45]:
prec_test1.precision_classbase

{0: 0.25, 1: 0.5}

In [46]:
ytrue2 = tf.Variable([0, 1, 0, 0, 1, 0, 1, 1])
ypred2 = tf.Variable([1, 0, 1, 1, 0, 0, 0, 0])

In [47]:
prec_test1(ytrue2,ypred2)

<tf.Tensor: shape=(), dtype=float32, numpy=0.4453125>

In [48]:
a = {0:4,1:3}

In [49]:
prec_test1.total_true

{0: 7, 1: 9}

<h3> Custom Layers </h3>

Sometimes we need to make layers which Tensorflow does not provide  default implementation for. In this case we need to create a custom layer.

Also we might want to club repeating layer patterns into one. 
E.g.: If you have layer pattern A,B,C,A,B,C,A,B,C we can make one layer D containing A,B,C and make D,D,D.

In [50]:
#to build custom layers with weights in them, we need to subclass of the keras.layers.Layer class
class MyDense(keras.layers.Layer):
    def __init__(self,units,activation = None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)
        #in build you build your kernel (weight matrix) and bias
    def build(self,batch_input_shape):
        #batch input shape is the number of features in the input to this layer
        #batch_input_shape = [batch_size, no.of features in each batch instance]
        self.kernel = self.add_weight(name = "kernel",
        shape = [batch_input_shape[-1],self.units],
        initializer="glorot_normal")
        self.bias = self.add_weight(name = "bias",
        shape = [self.units], initializer = "zeros")
        #sets self.built = True in parent class
        super().build(batch_input_shape)
    #call returns the output of the matrix multiplication or any result
    def call(self,X):
        return self.activation(tf.matmul(X,self.kernel) + self.bias)
    def compute_output_shape(self,batch_input_shape):
        #output shape will be [batch_size, no. of units]
        return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])
    def get_config(self):
        base_config = super().get_config()
        return {**base_config,"units":self.units,
        "activation":keras.activations.serialize(self.activation)}

In [51]:
#using the above custom layer
model = keras.models.Sequential()
#notice that input_shape is part of the **kwargs argument
model.add(MyDense(30, activation = "relu", input_shape = (8,)))
model.add(MyDense(1))

In [52]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
my_dense (MyDense)           (None, 30)                270       
_________________________________________________________________
my_dense_1 (MyDense)         (None, 1)                 31        
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________


In [53]:
model.layers[0].bias

<tf.Variable 'my_dense/bias:0' shape=(30,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>

<h3> Custom Models </h3> 

Lets say we are building a model with one dense input, which passes through an identical block(Residual Block), containing 2 dense layers and an addition of the input layer to the result of the 2 dense layers. The identical block is used 3 times.

In [54]:
#first we build this Residual Block
class ResidualBlock(keras.layers.Layer):
    def __init__(self,n_neurons,n_layers,**kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(n_neurons,activation = "elu",kernel_initializer="he_normal") for _ in range(n_layers)]
    #call returns the output of the matrix multiplication or any result
    def call(self,inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        #as the Residual block adds the output of the layers into
        return inputs + Z

In [55]:
#notice that although we inherit from keras.layers.Layer, we can still use normal keras.layers.Dense in it, and implement calculations using these Dense layers from call()
class ResidualRegressor(keras.models.Model):
    def __init__(self,output_dim,**kwargs):
        super().__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(30,activation = "elu",kernel_initializer="he_normal")
        self.block1 = ResidualBlock(2,30)
        self.block2 = ResidualBlock(2,30)
        self.out = keras.layers.Dense(output_dim)
    def call(self, inputs):
        Z = self.hidden1(inputs)
        for _ in range(1 + 3):
            Z = self.block1(Z)
        Z = self.block2(Z)
        return self.out(Z)
#we can use keras.layers.Layer to create custom Layers. we can use keras.models.Model to create custom models with custom layers in it. 

#then we can create instance of the keras.models.Model class and compile, fit as required

#Remember than keras.models.Model is a subclass of keras.layers.Layer with more functionality

In [56]:
model2 = ResidualRegressor(1)

In [57]:

block1 = ResidualBlock(2, 30)
model3 = keras.models.Sequential([
    keras.layers.Dense(30, activation="elu", kernel_initializer="he_normal"),
    block1, block1, block1, block1,
    ResidualBlock(2, 30),
    keras.layers.Dense(1)
])

<h3> Losses and Metrics Based on Model Internals </h3> 

Usually losses and metrics are based on predictions and ocassionaly sample weights. However we sometimes want it to be based on other parts of the model. 

In [58]:
class ReconstructingInputRegressor(keras.models.Model):
    def __init__(self,output_dim,**kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(30, activation = "selu", kernel_initializer = "lecun_normal") for _ in range(5)]
        self.out = keras.layers.Dense(output_dim)
    def build(self,batch_input_shape):
        self.reconstructlayer = keras.layers.Dense(batch_input_shape[-1])
        self.build(batch_input_shape)
    def call(self,inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        reconstruct = self.reconstructlayer(Z)
        recon_loss = tf.reduce_mean(tf.square(reconstruct - inputs))
        self.add_loss(0.05*recon_loss)
        return self.out(Z)

In [59]:
class ReconstructingInputRegressor2(keras.models.Model):
    def __init__(self,output_dim,input_dim,**kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(30, activation = "selu", kernel_initializer = "lecun_normal") for _ in range(5)]
        self.out = keras.layers.Dense(output_dim)
        self.reconstructlayer = keras.layers.Dense(input_dim)
        #adding a loss as a metric
        #self.reconmetric = keras.metrics.Mean(name = "recon_error")
    """ def build(self,batch_input_shape):
        self.reconstructlayer = keras.layers.Dense(batch_input_shape[-1])
        self.build(batch_input_shape) """
    def call(self,inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        reconstruct = self.reconstructlayer(Z)
        recon_loss = tf.reduce_mean(tf.square(reconstruct - inputs))
        #self.add_loss(0.05*recon_loss)
        self.add_metric(0.05*recon_loss, name = "recon_error")
        return self.out(Z)

In [123]:
class ReconstructingInputRegressor3(keras.layers.Layer):
    def __init__(self,input_dims,no_units,no_hidden_layers,**kwargs):
        super().__init__(**kwargs)
        self.layer_list = list()
        for _ in range(1,no_hidden_layers+1):
            if _ == 1:
                self.layer_list.append(self.add_weight(name = "kernel{}".format(_), shape = [input_dims, no_units], initializer="he_normal"))
            else:
                self.layer_list.append(self.add_weight(name = "kernel{}".format(_), shape = [no_units, no_units], initializer="he_normal"))

        self.reconstructlayer = self.add_weight(name = "reconstruct", 
        shape = [no_units, input_dims], initializer="he_normal")

        self.outputlayer = self.add_weight(name = "output", 
        shape = [no_units, 1], initializer="he_normal")
    def call(self,inputs):
        Z = inputs
        for layer in self.layer_list:
            Z = tf.matmul(Z,layer)
        reconstruct = tf.matmul(Z,self.reconstructlayer)
        recon_loss = tf.reduce_mean(tf.square(reconstruct - inputs))
        self.add_loss(0.05*recon_loss)
        self.add_metric(0.05*recon_loss,name = "recon_error")
        return tf.matmul(reconstruct,self.outputlayer)

<h3> add_loss </h3> 

A typical loss function takes ypred and ytrue and calculates the loss and tries to minimize it. But some loss functions require more than just that and will require you to do calculations in the call method of your model and use it to calculate a different loss which can be minimized. 

https://stackoverflow.com/questions/50063613/what-is-the-purpose-of-the-add-loss-function-in-keras

In [124]:
import numpy as np
X_dummy = np.random.randn(10,8)
y_dummy = np.random.randn(10,1)

In [125]:
layer1 = ReconstructingInputRegressor3(8,8,5)
model_recon = keras.models.Sequential()
model_recon.add(layer1)
model_recon.compile(loss = "mse",optimizer = "adam")

In [126]:
model_recon.fit(X_dummy,y_dummy, epochs = 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1f9ea0b44c0>

<h3> Custom layer to simulate a forward pass only </h3> 

In [107]:
#custom layer
class forwardPasslayer(keras.layers.Layer):
    def __init__(self,units, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        #self.output_dim = output_dim
    def build(self,batch_input_shape):
        self.kernel1 = self.add_weight(name = "kernel",
        shape = [batch_input_shape[-1],self.units],
        initializer="zeros")
        self.kernel2= self.add_weight(name = "kernel1",
        shape = [self.units,self.units], initializer = "he_normal")
    def call(self,X):
        intermediate = tf.math.add(tf.matmul(X,self.kernel1),1)
        return tf.matmul(intermediate,self.kernel2)
    def compute_output_shape(self,batch_input_shape):
        return tf.TensorShape([self.units] + [self.units])

In [108]:
X_test1 = np.random.randn(10,6)

In [109]:
model_zeros = keras.models.Sequential()
model_zeros.add(forwardPasslayer(8, input_shape = (6,)))

In [110]:
model_zeros.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
forward_passlayer_2 (forward (None, 8)                 112       
Total params: 112
Trainable params: 112
Non-trainable params: 0
_________________________________________________________________


In [111]:
model_zeros(X_test1)

<tf.Tensor: shape=(10, 8), dtype=float32, numpy=
array([[-0.12541538,  1.0706236 , -0.569745  ,  0.26126125,  0.21524581,
        -1.100515  , -2.4108896 ,  1.0251961 ],
       [-0.12541538,  1.0706236 , -0.569745  ,  0.26126125,  0.21524581,
        -1.100515  , -2.4108896 ,  1.0251961 ],
       [-0.12541538,  1.0706236 , -0.569745  ,  0.26126125,  0.21524581,
        -1.100515  , -2.4108896 ,  1.0251961 ],
       [-0.12541538,  1.0706236 , -0.569745  ,  0.26126125,  0.21524581,
        -1.100515  , -2.4108896 ,  1.0251961 ],
       [-0.12541538,  1.0706236 , -0.569745  ,  0.26126125,  0.21524581,
        -1.100515  , -2.4108896 ,  1.0251961 ],
       [-0.12541538,  1.0706236 , -0.569745  ,  0.26126125,  0.21524581,
        -1.100515  , -2.4108896 ,  1.0251961 ],
       [-0.12541538,  1.0706236 , -0.569745  ,  0.26126125,  0.21524581,
        -1.100515  , -2.4108896 ,  1.0251961 ],
       [-0.12541538,  1.0706236 , -0.569745  ,  0.26126125,  0.21524581,
        -1.100515  , -2.4108896

<h3> Computing Gradient using Autodiff </h3> 

 

In [68]:
#toy function
def f(w1,w2):
    return 3*w1 ** 2 + 2*w1*w2

In [69]:
w1 = 5
w2 = 3
eps = 1e-6
(f(w1 + eps,w2) - f(w1,w2))/eps

36.000003007075065

In [70]:
(f(w1,w2 + eps) - f(w1,w2))/eps

10.000000003174137

<h3> Using Autodiff with Tensorflow </h3> 

In [71]:
w1 = tf.Variable(5.)
w2 = tf.Variable(3.)

with tf.GradientTape() as tape:
    z = f(w1,w2)

In [72]:
gradients = tape.gradient(z,[w1,w2])

In [73]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

<h3> Gradient Tapes </h3> 

Tensorflow records relevant operations executed inside the context of a tf.GradientTape onto a "tape". This tape is used to compute the gradients of a recorded computation.

In [74]:
w = tf.Variable(tf.random.normal((3,2)),name = "w")
w

<tf.Variable 'w:0' shape=(3, 2) dtype=float32, numpy=
array([[-1.3198097 ,  0.05966563],
       [ 1.5557507 ,  0.678337  ],
       [ 0.53418165,  0.07422443]], dtype=float32)>

In [75]:
b = tf.Variable(tf.zeros(2,dtype = tf.float32),name = 'b')
b

<tf.Variable 'b:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>

In [76]:
x = [[1.,2.,3.]]
x

[[1.0, 2.0, 3.0]]

In [77]:
#derivatives of tensors
w = tf.Variable(tf.random.normal((3, 2)), name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')
x = [[1., 2., 3.]]

with tf.GradientTape(persistent=True) as tape:
  y = x @ w + b
  loss = tf.reduce_mean(y**2)

In [78]:
[dl_dw, dl_db] = tape.gradient(loss, [w, b])

In [79]:
print(dl_dw.shape)
print(dl_db.shape)

(3, 2)
(2,)


<h3> Gradients with respect to a model </h3> 

In [80]:
layer = keras.layers.Dense(2,activation = "relu")
x = tf.constant([[1.,2.,3.]])

In [81]:
with tf.GradientTape() as tape:
    y = layer(x)
    loss = tf.reduce_mean(y**2)

In [82]:
grad = tape.gradient(loss,layer.trainable_variables)

By default, the tape will only track operations involving variables, so if you try to compute the gradient of z with regards to anything else than a variable, the result will
be None

Sometimes you will have to track how the gradient changes with respect to constants too to penalize functions that vary a lot when the inputs vary a little.

In [83]:
#specifying which gradients to watch
w = tf.Variable(tf.random.normal((3, 2)), name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')
x = [[1., 2., 3.]]

with tf.GradientTape(watch_accessed_variables=False) as tape:
  tape.watch(w)
  y = x @ w + b
  loss = tf.reduce_mean(y**2)

In [84]:
dy_dw,dy_db = tape.gradient(y,[w,b])

In [85]:
dy_dw.numpy()

array([[1., 1.],
       [2., 2.],
       [3., 3.]], dtype=float32)

In [86]:
#this is None as it wasnt being watched
dy_db

In [87]:
x1 = tf.Variable(20.0)
x2 = tf.Variable(30.0)

In [88]:
def f1(w1,w2):
    return w1**3 + w2**2 + w1*w2

In [89]:
with tf.GradientTape(persistent=True) as hessian_tape:
    with tf.GradientTape() as jacobian_tape:
        z = f1(x1,x2)
    jacobians = jacobian_tape.gradient(z,[x1,x2])


In [90]:
jacobians

[<tf.Tensor: shape=(), dtype=float32, numpy=1230.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=80.0>]

In [91]:
hessians = [hessian_tape.gradient(jacobian,[x1,x2]) for jacobian in jacobians]

In [92]:
hessians

[[<tf.Tensor: shape=(), dtype=float32, numpy=120.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=1.0>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=1.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=2.0>]]

In [93]:
hessian_2 = hessian_tape.gradient(jacobians,[x1,x2])

In [94]:
hessian_2

[<tf.Tensor: shape=(), dtype=float32, numpy=121.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=3.0>]

If we do not call the jacobian for each partial derivative in the list jacobians, the hessians become the some of each first partial derivative

<h3> Writing Custom Training Loops </h3> 

In [95]:
#building a model
l2_reg = keras.regularizers.l2(0.05)
model_custom = keras.models.Sequential()
model_custom.add(keras.layers.Dense(30,activation = 'elu',kernel_initializer = "he_normal", kernel_regularizer = l2_reg))
model_custom.add(keras.layers.Dense(1, kernel_regularizer = l2_reg))

In [96]:
def random_batch(X,y, batch_size = 32):
    idx = np.random.randint(len(X), size = batch_size)
    return X[idx],y[idx]

In [97]:
def print_status_bar(iteration, total, loss, metrics = None):
    metrics_or_loss = '-'.join(['{}:{:.4f}'.format(m.name,m.result()) for m in [loss] + (metrics or [])])
    end = "" if iteration < loss else "\n"
    print("\r{}/{} - ".format(iteration,total) + metrics_or_loss, end = end)

In [98]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [99]:
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

In [100]:
n_epochs = 5
batch_size = 32
n_steps_per_epoch = X_train_scaled.shape[0]/batch_size
optimizer = keras.optimizers.Nadam(lr = 0.01)
loss_fn = keras.losses.mean_squared_error
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

In [101]:
for epoch in range(1,n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1,n_steps_per_epoch + 1):
        X_batch,y_batch = random_batch(X_train_scaled,y_train)
        with tf.GradientTape() as tape:
            y_pred = model_custom(X_batch)
            loss = tf.add_n(model_custom.losses, mean_loss(loss_fn(y_batch,y_pred)))
        grad = tape.gradient(loss, model.trainable_variables)
        

Epoch 1/5


TypeError: 'float' object cannot be interpreted as an integer

In [124]:
X_batch,y_batch = random_batch(X_train_scaled,y_train)

In [125]:
y_pred = model_custom(X_batch)

In [126]:
loss_fn(y_batch,y_pred)

<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([3.7750752e+00, 8.7664551e-01, 1.6342984e-01, 2.0231834e+01,
       4.0239644e+00, 1.2731744e+01, 4.5532937e+00, 1.0202341e+01,
       4.3532410e+00, 1.1956502e+01, 4.5054402e+00, 4.6176765e-02,
       6.3036221e-01, 7.8236265e-03, 3.6088729e+00, 2.8521307e+00,
       1.8072631e+00, 5.1269202e+00, 2.0749578e+00, 5.1104884e+00,
       2.2987299e+00, 1.4141642e+00, 3.3975153e+00, 5.1092470e-01,
       1.1012134e+01, 5.9215255e+00, 2.9401927e+00, 8.0327368e+00,
       1.8712255e+01, 5.4398708e+00, 2.9818167e+01, 3.1169047e+00],
      dtype=float32)>

In [122]:
tf.reduce_mean(loss_fn(y_batch,y_pred))

<tf.Tensor: shape=(), dtype=float32, numpy=5.5209675>

In [117]:
mean_loss(loss_fn(y_batch,y_pred))

<tf.Tensor: shape=(), dtype=float32, numpy=6.099476>

In [127]:
[tf.reduce_mean(loss_fn(y_batch,y_pred))] + model_custom.losses

[<tf.Tensor: shape=(), dtype=float32, numpy=5.976676>,
 <tf.Tensor: shape=(), dtype=float32, numpy=2.7362802>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.1013649>]

In [129]:
model_custom.losses

False