# Custom Loss function

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
# inputs
xs = np.array([-1.0,  0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)

# labels
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)

In [8]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1,input_shape=[1])
])

model.compile(optimizer='sgd',loss='mean_squared_error')
model.fit(xs,ys,epochs=500,verbose=0)
print(model.predict([10.0]))

[[18.9853]]


## Huber loss 

In [4]:
def my_huber_loss(y_true,y_pred):
    threshold = 1
    error = y_true-y_pred
    is_small_error = tf.abs(error)<=threshold
    small_error_loss = tf.square(error)/2
    big_error_loss = threshold*(tf.abs(error)-(0.5*threshold))
    return tf.where(is_small_error,small_error_loss,big_error_loss)

In [5]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1,input_shape=[1])
])

model.compile(optimizer='sgd',loss=my_huber_loss)
model.fit(xs,ys,epochs=500,verbose=0)
print(model.predict([10.0]))

[[18.74939]]


## Hyperparameters - nested function

In [6]:
def my_huber_loss_with_threshold(threshold):
    def my_huber_loss(y_true,y_pred):
#         threshold = 1
        error = y_true-y_pred
        is_small_error = tf.abs(error)<=threshold
        small_error_loss = tf.square(error)/2
        big_error_loss = threshold*(tf.abs(error)-(0.5*threshold))
        return tf.where(is_small_error,small_error_loss,big_error_loss)
    return my_huber_loss

In [7]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1,input_shape=[1])
])

model.compile(optimizer='sgd',loss=my_huber_loss_with_threshold(threshold=0.8))
model.fit(xs,ys,epochs=500,verbose=0)
print(model.predict([10.0]))

[[18.109344]]


## Class for loss function

In [21]:
from tensorflow.keras.losses import Loss

class MyHuberLoss(Loss):
    threshold=1
    
    def __init__(self,threshold):
        super().__init__()
        self.threshold = threshold
        
    def call(self,y_true,y_pred):
        error = y_true-y_pred
        is_small_error = tf.abs(error)<=self.threshold
        small_error_loss = tf.square(error)/2
        big_error_loss = self.threshold*(tf.abs(error)-(0.5*self.threshold))
        return tf.where(is_small_error,small_error_loss,big_error_loss)
        
model.compile(optimizer='sgd',loss=MyHuberLoss(threshold=0.8))
model.fit(xs,ys,epochs=500,verbose=0)
print(model.predict([10.0]))    

[[18.966953]]


# Contrastive Loss

In [26]:
import tensorflow.keras.backend as k
def contrastive_loss_with_margin(margin):
    def contrastive_loss(y_true,y_pred):
#         margin=1
        square_pred = tf.square(y_pred)
        margin_square = tf.square(tf.maximum(margin-y_pred,0))
        return k.mean(y_true*square_pred+(1-y_true)*margin_square)
    return contrastive_loss

In [29]:
from tensorflow.keras.losses import Loss

class contrastive_loss(Loss):
    margin=1
    
    def __init__(self,margin):
        super().__init__()
        self.margin = margin
        
    def call(self,y_true,y_pred):
        square_pred = tf.square(y_pred)
        margin_square = tf.square(tf.maximum(self.margin-y_pred,0))
        return k.mean(y_true*square_pred+(1-y_true)*margin_square)
        
model.compile(optimizer='sgd',loss=contrastive_loss(margin=0.8))
model.fit(xs,ys,epochs=500,verbose=0)
print(model.predict([10.0]))   

[[-27809772.]]


# Lambda Layer

In [30]:
%reset -f
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras.datasets import fashion_mnist

In [31]:
# load the dataset
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

# prepare train and test sets
train_images = train_images.astype('float32')
test_images = test_images.astype('float32')

# normalize values
train_images = train_images / 255.0
test_images = test_images / 255.0

In [33]:
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
                                    tf.keras.layers.Dense(128,activation='relu'),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=10,verbose=2)

Train on 60000 samples
Epoch 1/10
60000/60000 - 4s - loss: 0.4984 - accuracy: 0.8249
Epoch 2/10
60000/60000 - 3s - loss: 0.3737 - accuracy: 0.8654
Epoch 3/10
60000/60000 - 3s - loss: 0.3366 - accuracy: 0.8781
Epoch 4/10
60000/60000 - 3s - loss: 0.3103 - accuracy: 0.8863
Epoch 5/10
60000/60000 - 3s - loss: 0.2957 - accuracy: 0.8924
Epoch 6/10
60000/60000 - 3s - loss: 0.2802 - accuracy: 0.8971
Epoch 7/10
60000/60000 - 3s - loss: 0.2686 - accuracy: 0.9002
Epoch 8/10
60000/60000 - 3s - loss: 0.2598 - accuracy: 0.9038
Epoch 9/10
60000/60000 - 3s - loss: 0.2508 - accuracy: 0.9066
Epoch 10/10
60000/60000 - 3s - loss: 0.2406 - accuracy: 0.9092


<tensorflow.python.keras.callbacks.History at 0x29453011160>

In [34]:
# Relu activation removed from 1st layer, degrades the accuracy
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
                                    tf.keras.layers.Dense(128),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=10,verbose=2)

Train on 60000 samples
Epoch 1/10
60000/60000 - 4s - loss: 0.5340 - accuracy: 0.8135
Epoch 2/10
60000/60000 - 3s - loss: 0.4580 - accuracy: 0.8406
Epoch 3/10
60000/60000 - 3s - loss: 0.4404 - accuracy: 0.8456
Epoch 4/10
60000/60000 - 3s - loss: 0.4302 - accuracy: 0.8495
Epoch 5/10
60000/60000 - 3s - loss: 0.4229 - accuracy: 0.8528
Epoch 6/10
60000/60000 - 3s - loss: 0.4197 - accuracy: 0.8531
Epoch 7/10
60000/60000 - 3s - loss: 0.4137 - accuracy: 0.8548
Epoch 8/10
60000/60000 - 3s - loss: 0.4103 - accuracy: 0.8549
Epoch 9/10
60000/60000 - 3s - loss: 0.4058 - accuracy: 0.8577
Epoch 10/10
60000/60000 - 3s - loss: 0.4050 - accuracy: 0.8582


<tensorflow.python.keras.callbacks.History at 0x29454811518>

In [35]:
# Adding a custom layer instead of Relu improves the performance
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
                                    tf.keras.layers.Dense(128),
                                    tf.keras.layers.Lambda(lambda x:tf.abs(x)),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=10,verbose=2)

Train on 60000 samples
Epoch 1/10
60000/60000 - 4s - loss: 0.5151 - accuracy: 0.8230
Epoch 2/10
60000/60000 - 3s - loss: 0.3913 - accuracy: 0.8615
Epoch 3/10
60000/60000 - 4s - loss: 0.3558 - accuracy: 0.8724
Epoch 4/10
60000/60000 - 3s - loss: 0.3314 - accuracy: 0.8806
Epoch 5/10
60000/60000 - 3s - loss: 0.3151 - accuracy: 0.8867
Epoch 6/10
60000/60000 - 3s - loss: 0.3002 - accuracy: 0.8921
Epoch 7/10
60000/60000 - 3s - loss: 0.2868 - accuracy: 0.8967
Epoch 8/10
60000/60000 - 3s - loss: 0.2776 - accuracy: 0.8989
Epoch 9/10
60000/60000 - 3s - loss: 0.2691 - accuracy: 0.9017
Epoch 10/10
60000/60000 - 3s - loss: 0.2635 - accuracy: 0.9039


<tensorflow.python.keras.callbacks.History at 0x29452802748>

## Modified Relu

In [40]:
import tensorflow.keras.backend as K
def my_relu(x):
    return K.maximum(-0.5,x)

In [41]:
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
                                    tf.keras.layers.Dense(128),
                                    tf.keras.layers.Lambda(my_relu),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=10,verbose=2)

Train on 60000 samples
Epoch 1/10
60000/60000 - 5s - loss: 0.4867 - accuracy: 0.8260
Epoch 2/10
60000/60000 - 5s - loss: 0.3710 - accuracy: 0.8646
Epoch 3/10
60000/60000 - 5s - loss: 0.3339 - accuracy: 0.8769
Epoch 4/10
60000/60000 - 5s - loss: 0.3100 - accuracy: 0.8853
Epoch 5/10
60000/60000 - 5s - loss: 0.2930 - accuracy: 0.8906
Epoch 6/10
60000/60000 - 5s - loss: 0.2793 - accuracy: 0.8955
Epoch 7/10
60000/60000 - 5s - loss: 0.2671 - accuracy: 0.8997
Epoch 8/10
60000/60000 - 5s - loss: 0.2546 - accuracy: 0.9054
Epoch 9/10
60000/60000 - 5s - loss: 0.2463 - accuracy: 0.9089
Epoch 10/10
60000/60000 - 5s - loss: 0.2354 - accuracy: 0.9108


<tensorflow.python.keras.callbacks.History at 0x29453c6bac8>

# Custom Layer - That are trainable

In [46]:
# Inherit from keras Layer
# initialize with __init__
# build method for weights and bias initialization
# call method for computaion from input to output

from tensorflow.keras.layers import Layer

class SimpleDense(Layer):

    def __init__(self,units=32,activation=None):
        super(SimpleDense,self).__init__()
        self.units=units
        self.activation = tf.keras.activations.get(activation)
        
        
    def build(self, input_shape):
        '''Create the state of the layer (weights)'''
        # initialize the weights
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(name="kernel",
            initial_value=w_init(shape=(input_shape[-1], self.units),
                                 dtype='float32'),
            trainable=True)

        # initialize the biases
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(name="bias",
            initial_value=b_init(shape=(self.units,), dtype='float32'),
            trainable=True)

    def call(self, inputs):
        '''Defines the computation from inputs to outputs'''
        return self.activation(tf.matmul(inputs, self.w) + self.b)

In [47]:
# declare an instance of the class
my_dense = SimpleDense(units=1)

# define an input and feed into the layer
x = tf.ones((1, 1))
y = my_dense(x)

# parameters of the base Layer class like `variables` can be used
print(my_dense.variables)

[<tf.Variable 'simple_dense_2/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[0.03428066]], dtype=float32)>, <tf.Variable 'simple_dense_2/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]


In [50]:
# define the dataset
xs = np.array([-1.0,  0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)


# use the Sequential API to build a model with our custom layer
my_layer = SimpleDense(units=1,activation='relu')
model = tf.keras.Sequential([my_layer])

# configure and train the model
model.compile(optimizer='sgd', loss='mean_squared_error')
model.fit(xs, ys, epochs=500,verbose=0)

# perform inference
print(model.predict([10.0]))

# see the updated state of the variables
print(my_layer.variables)

[[17.9529]]
[<tf.Variable 'sequential_19/simple_dense_5/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[1.851683]], dtype=float32)>, <tf.Variable 'sequential_19/simple_dense_5/bias:0' shape=(1,) dtype=float32, numpy=array([-0.5639308], dtype=float32)>]


# Custom Model

In [1]:
%reset -f
import tensorflow as tf
from tensorflow.keras import backend as K

data = tf.keras.datasets.fashion_mnist

(training_images, training_labels), (test_images, test_labels) = data.load_data()

training_images  = training_images / 255.0
test_images = test_images / 255.0



class MNISTNetwork(tf.keras.Model):
    def __init__(self,**kwargs):
        super(MNISTNetwork,self).__init__(**kwargs)
        self.flatten = tf.keras.layers.Flatten()
        self.Dense1 = tf.keras.layers.Dense(128,activation='relu')
        self.classification = tf.keras.layers.Dense(10,activation='softmax')
    def call(self,inputs):
        input_a = inputs
        x = self.flatten(input_a)
        x = self.Dense1(x)
        outputs = self.classification(x)
        
        return outputs

    
    
model = MNISTNetwork()      


model.compile(optimizer='adam',
              loss="sparse_categorical_crossentropy",
              metrics=['accuracy'])



history = model.fit(training_images, training_labels, epochs=10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
