# **Training and Evaluation with build-in methods**

## **Setup && Introduction**

In [1]:
import tensorboard

import tensorflow as tf
import tensorflow.keras as tfk
from tensorflow.keras import layers as lyrs

import numpy as np
import matplotlib.pyplot as plt

In [2]:
# HYPERPARAMETERS uniform through out the code
BS = 64 # Batch_Size
EP = 10 # Epochs
VB = 1 # Verbose

## **API overview: An End-to-End Example**

In [3]:
# Creating a basic Mnist Classifier for starting
(tr_data, tr_lbls), (ts_data, ts_lbls) = tfk.datasets.mnist.load_data()

tr_data = tr_data.reshape(-1, 28*28).astype('float32') / 255.0
ts_data = ts_data.reshape(-1, 28*28).astype('float32') / 255.0

val_data = tr_data[:-10000] # Reserve last 10,000 images for validation set
val_lbls = tr_lbls[:-10000] # Take last 10,000 labels as-well.

In [4]:
# Define the Model Structure and Compile
Model_mnist = tfk.Sequential(
    [
        lyrs.Dense(128, activation='relu', input_shape=(784,), name= 'InputLayer'),
        lyrs.Dense(64, activation='relu', name= 'Dense_1'),
        lyrs.Dense(10, activation='softmax', name= 'OutputLayer'),
    ], name= 'Digits_Recognition_Model'
)

Model_mnist.summary()

Model_mnist.compile(
    optimizer=tfk.optimizers.Nadam(),
    loss= tfk.losses.SparseCategoricalCrossentropy(),
    metrics= [tfk.metrics.SparseCategoricalAccuracy()]
)

Model: "Digits_Recognition_Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 InputLayer (Dense)          (None, 128)               100480    
                                                                 
 Dense_1 (Dense)             (None, 64)                8256      
                                                                 
 OutputLayer (Dense)         (None, 10)                650       
                                                                 
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________


In [5]:
# Training the Model and seeing the result!!
Model_mnist.fit(
    tr_data, tr_lbls,
    validation_data= (val_data, val_lbls),
    epochs= EP, verbose= VB,
    batch_size= BS
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x217c1183e50>

In [6]:
# Evaluating the Model on Test Data!!
Model_mnist.evaluate(ts_data, ts_lbls, verbose= VB, batch_size= BS)



[0.0842050313949585, 0.979200005531311]

## **The Compile Method**

In [29]:
# A vauge Compile Function
Model_mnist.compile(
    optimizer= 'Adam',
    loss= 'MSE',
    metrics= ['accuracy']
)
# Typically, the compile function takes 3 main arguments
# OPTIMIZER, LOSS_FUNCTION,  and EVALUATION_METRIC(optional)

def get_model():
    Inp = tfk.Input(shape= (784, ))
    lyr = lyrs.Dense(64, activation='relu')(Inp)
    lyr = lyrs.Dense(64, activation='relu')(lyr)
    Out = lyrs.Dense(10)(lyr)

    return tfk.Model(inputs= Inp, outputs= Out)

def compile_model(model):
    model.summary()
    
    model.compile(
        optimizer= 'Adam',
        loss= 'MSE',
        metrics= ['accuracy']
    )
    return model

# Once the Model is defined, it can be compiled
compiled_model = compile_model(get_model())

Model: "model_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(None, 784)]             0         
                                                                 
 dense_50 (Dense)            (None, 64)                50240     
                                                                 
 dense_51 (Dense)            (None, 64)                4160      
                                                                 
 dense_52 (Dense)            (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


### **Built-in optimizers, losses and metrics**

**Visit `https://keras.io/api/` for in-depth information on inbuilt losses, metrics etc**

### **Custom Losses**
> **2 Methods of creating custom losses**
1. **Functional approach -- pass y_true and y_pred to a func and compute the difference between the two values**
2. **Sub-classing approach -- Implement methods __init__() and call()**

In [8]:
# Functional Approach
def Mean_SQ_Error(y_true, y_pred): 
    return tf.math.reduce_mean(tf.square(y_true - y_pred))


# Getting a Vauge Model calling a previously created function
model = get_model()

# Defining prams for compile
metric = tfk.metrics.SparseCategoricalAccuracy()
opt = tfk.optimizers.Adamax()
loss = Mean_SQ_Error

model.compile(
    optimizer= tfk.optimizers.Adamax(0.001),
    loss= Mean_SQ_Error,
    # metrics= [metric]
)

OneHot_lbls = tf.one_hot(tr_lbls, depth= 10)

# model.fit(tr_data, OneHot_lbls, epochs= EP, batch_size= BS)
model.fit(tr_data, OneHot_lbls, batch_size=64, epochs=5)

# Sparse Catagorical Cross Entropy will not work here...
...

# Custom Loss not working so-far

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Ellipsis

In [9]:
# Sub-Classing using tf.keras.losses.Loss class
class MeSqEr_Custom(tfk.losses.Loss):
    def init(self, regfac= 0.1):
        super().__init__()
        self.regularization_factor = regfac
    
    def call(self, ytrue, ypred):
        SqErr1 = tf.square(ytrue - ypred)
        mse = tf.reduce_mean(SqErr1)
        
        SqErr2 = tf.square(0.5 - ypred)
        reg = tf.reduce_mean(SqErr2)
        
        cstm_loss = mse + reg * self.regularization_factor
        return cstm_loss
    

class CustomMSE(tfk.losses.Loss):
    def __init__(self, regularization_factor=0.1, name="custom_mse"):
        super().__init__(name=name)
        self.regularization_factor = regularization_factor

    def call(self, y_true, y_pred):
        mse = tf.math.reduce_mean(tf.square(y_true - y_pred))
        reg = tf.math.reduce_mean(tf.square(0.5 - y_pred))
        return mse + reg * self.regularization_factor

# Get a Model    
model = get_model()

# Define prams
# optimizer and metric already defined from the last block: opt, metric
# cstm_mse = MeSqEr_Custom(0.25)
cstm_mse = CustomMSE()

model.compile(
    optimizer= opt,
    loss= cstm_mse,
    # metrics= [metric]
)

tr_lbls_OneHot = tf.one_hot(tr_lbls, depth= 10)
model.fit(tr_data, tr_lbls_OneHot, epochs=EP, verbose=VB, batch_size=BS)

# Custom Loss not working so-far

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x217c37c2130>

### **Custom Metrics**

In [10]:
'''
    To implement custom metrics, you need to subclass the 
    tf.metrics.Metric class and implement 4 functions
    1. init()   2. update_state()   3. reset_state()    4. result()  
'''

# Here's a simple example showing how to implement a CategoricalTruePositives metric 
# that counts how many samples were correctly classified as belonging to a given class

class cstm_metric(tf.metrics.Metric):
    def __init__(self): #  create state variables for your metric
        super(cstm_metric, self).__init__()
        self.true_positives = self.add_weight(
            name= 'ctp',
            initializer= 'zeros'
        )
        
    # uses the targets y_true and the model predictions y_pred to update the state variables.
    def update_state(self, y_true, y_pred, sample_weight= None):
        y_pred = tf.reshape(
            tf.argmax(y_pred, axis= 1), shape=(-1, 1)
        )
        
        values = tf.cast(y_true, 'int32') == tf.cast(y_pred, 'int32')
        values = tf.cast(values, 'float32')
        
        if sample_weight is not None:
            sample_weight = tf.cast(sample_weight, 'float32')
            values = values * sample_weight
            
        self.true_positives.assign_add(tf.reduce_sum(values))
    
    def result(self):  # Uses the state variables to compute the final results.
        return self.true_positives
    
    def reset_state(self): # Reinitializes the state of the metric.
        self.true_positives.assign_add(0.0)

In [11]:
# Testing the new Metric
model = get_model()

model.compile(
    optimizer=opt,
    loss= tfk.losses.SparseCategoricalCrossentropy(),
    metrics= [cstm_metric()]
)

model.fit(tr_data, tr_lbls, epochs= EP)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x217c3b03a90>

### **Handling losses and metrics that donot fit the standard signature**

> **To achieve this, we call the add_loss() and add_metric() methods from within the call function of a custom Layer, and the metric and/or loss gets added to the main loss/metric passed to the model at the compile time**

In [13]:
class ActivityRegularizer(tfk.layers.Layer):
    def call(self, inputs):
        super(ActivityRegularizer, self).__init__()
        self.add_loss(tf.reduce_mean(inputs) * 0.1)
        return inputs

# Test the layer!!
inp = tfk.Input(shape= (784, ))
model_1 = get_model()  

reg_model = tfk.Sequential([
    model_1,
    ActivityRegularizer(),
    tfk.layers.Dense(3)]
)

model_1 = compile_model(reg_model)

model_1.fit(tr_data, tr_lbls, epochs=3, verbose= 1)
 

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x217c3e60400>

#### **Handling Loss**

In [14]:
print("Dealing loss here!!")
class ActReg(lyrs.Layer):
    def call(self, inputs):
        # Using add_los mothod here to alter loss computation...
        self.add_loss(tf.reduce_sum(inputs) * 0.1)
        return inputs # Just Pass through the Layer

inp = tfk.Input(shape= (784, ))
x_ten = lyrs.Dense(64, 'relu')(inp)
x_ten = ActReg()(x_ten)

x_ten = lyrs.Dense(64, 'relu')(x_ten)
out = lyrs.Dense(10, 'relu')(x_ten)

model = tfk.Model(inputs= inp, outputs= out)

model.compile(
    opt, 
    loss= tfk.losses.SparseCategoricalCrossentropy(from_logits= True)
)

model.fit(tr_data, tr_lbls, epochs= EP, verbose= VB, batch_size= BS)

Dealing loss here!!
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x217c3f5d3a0>

#### **Handling Metrcis**

In [15]:
print("Dealing metrics here!!")
class MetricLoggingLayer(tfk.layers.Layer):
    # The `aggregation` argument defines how to aggregate the 
    # per-batch values over each epoch
    # Aggregation mean simply averages them.
    def __init__(self, inputs):
        super(MetricLoggingLayer, self).__init__()
        self.add_metric(
            tfk.backend.std(inputs), name='std_activation', aggregation= 'mean' 
        )
        
        return inputs
    
opt_sch = tfk.optimizers.schedules.InverseTimeDecay(
    0.002,
    decay_steps= 32 * EP,
    decay_rate= 1,
    staircase= False
)

model = get_model()
model.compile(
    loss= tfk.losses.SparseCategoricalCrossentropy(from_logits= True),
    optimizer= tfk.optimizers.Adam(opt_sch),
    metrics=['accuracy']
)

hist = model.fit(
    tr_data,
    tr_lbls,
    epochs= EP,
    batch_size= BS,
    verbose= VB,
    validation_split= 0.2
)   
        
        

Dealing metrics here!!
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### **Working with custom losses and metrics && Fuctional API**

In [19]:
# Sequential Model!!
inps = tfk.Input(
    shape=(784, ), name='Inputs'
)

lyr = tfk.layers.Dense(64, 'relu', input_shape= (784, ))(inps)
lyr = tfk.layers.Dense(64, 'relu')(lyr)
lyr = tfk.layers.Dense(64, 'relu')(lyr)
outs = tfk.layers.Dense(10, 'relu')(lyr)

model_fn = tfk.Model(
    inputs= inps,
    outputs= outs,
    name= 'FuncModel_with_CustomLosses_Metrics'
)

model.add_loss(
    Mean_SQ_Error 
    # tf.reduce_sum(x1) * 0.1
)

model_fn.add_metric(
    tfk.backend.std(lyr), name='std_of_activation_LastDenseLayer', aggregation='mean'
)

model_fn.compile(
    loss= tfk.losses.SparseCategoricalCrossentropy(from_logits= True),
    optimizer= tfk.optimizers.Adam(opt_sch),
    metrics=['accuracy']
)


hist = model_fn.fit(
    tr_data,
    tr_lbls,
    epochs= EP,
    batch_size= BS,
    verbose= VB,
    validation_split= 0.2
)   


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### **Adding loss and metrics to a Layer**

In [None]:
class LogisticEndpoint(tfk.layers.Layer):
    # this layer has Multiple functions.
    # 1. Calculate loss for Model 
    # 2. Calculate evaluation metrics
    # (No need to specify loss and metrics in model.compile)
    # 3. Return a vector of probabilities for each class. tf.nn.softmax
    def __init__(self, name=None):
        super(LogisticEndpoint, self).__init__(name= name)
        self.loss_fn = tfk.losses.SparseCategoricalCrossentropy(from_logits= True)
        self.acc = tfk.metrics.BinaryAccuracy()
        
    def call(self, target, logits, sample_weis=None):
        # Compute the training-time loss value and add it
        # to the layer using `self.add_loss()`.
        loss = self.loss_fn(target, logits, sample_weis)
        self.add_loss(loss)
        
        # Log accuracy as a metric and add it
        # to the layer using `self.add_metric()`
        acc = self.acc(target, logits, sample_weis)
        self.add_metric(acc, name= 'acc')
        
        # Return the inference-time prediction tensor (for `.predict()`).
        return tf.nn.softmax(logits)
    
    

# !!<==..==>!! I'll do this part at home !! !! !!


### **Automatically setting apart a validation holdout set**
> **`model.fit(..., validation_split= 0.xxxx)`  when your data is passed as NumPy arrays.**

## **Training and Evaluation from tf.Datasets**


### **TF.Datasets overview**
**Handling data that comes in shape of tf.dataset object**
> The tf.data API is a set of utilities in TensorFlow 2.0 for loading and preprocessing data in a way that's fast and scalable.

In [33]:
model = compile_model(get_model())

# For basic level, we'll use the same mnist data instance to create tf.dataset object

# convert numpy data into tf.data.Dataset object (training data)
training_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_lbls))
training_dataset = training_dataset.shuffle(buffer_size=1024).batch(64)  # Shuffle and slice


# convert numpy data into tf.data.Dataset object (test data)
test_dataset = tf.data.Dataset.from_tensor_slices((ts_data, ts_lbls))
test_dataset = test_dataset.batch(64)

training_dataset

model.fit(
    training_dataset, 
    epochs= EP,
    verbose=VB,
)

Model: "model_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_17 (InputLayer)       [(None, 784)]             0         
                                                                 
 dense_62 (Dense)            (None, 64)                50240     
                                                                 
 dense_63 (Dense)            (None, 64)                4160      
                                                                 
 dense_64 (Dense)            (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x217c370afd0>

### **Using a Validation dataset**

## **Other supported input formats**

## **Using tfk.utils.Sequence object as input**

## **Using sample weighting and class weighting**

### **Class Weights**

### **Sample Weights**

## **Passing data to multi-input/output models**

## **Using Callbacks**

### **Built-in callbacks**


### **Writing custom callbacks**


## **Model Checkpoints**

## **Usign learning-rate schedules**

### **Passing a schedule to an optimizer**

### **Using callbacks to implement a dynamic learning rate schedule**

## **Visualizing loss and metrics during training**

### **Using Tensorboard callback**

# **Wrapup**