## **Setup + Introduction**

In [5]:
import numpy as np

import tensorflow as tf
import tensorflow.keras as tfk
from tensorflow.keras import layers as lyrs, optimizers as opts, activations as acts, losses as ls

import matplotlib.pyplot as plt


## **Training, evaluation, and inference**

>> **The main idea is that a deep learning model is usually a directed acyclic graph (DAG) of layers. So the functional API is a way to build `graphs of layers`.**

In [None]:
'''
    Creating a Linear totplogy using functional layer!!
    Topology:
        Inputs([784]) => Dense(units= 64, 'relu') => Dense(units= 32, 'relu') => Dense(units= 10, 'softmax')
'''

inp = tfk.Input(shape=[784], name='Input_Layer')
d1 = lyrs.Dense(128, 'relu', name= 'Dense_1')(inp)
d2 = lyrs.Dense(64, 'relu', name= 'Dense_2')(d1)
out = lyrs.Dense(10, 'softmax', name= 'OutputLayer')(d2)


model = tfk.Model(inputs= inp, outputs= out, name='Classification_Model')
model.summary()


In [None]:
tfk.utils.plot_model(model, to_file='model.png', show_shapes= True)

In [None]:
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

'''
    1. Preparing data
    2. Building the Model
    3. Train and evaluate 
'''

dataset = tfk.datasets.mnist
(tr_data, tr_lbls), (ts_data, ts_lbls) = dataset.load_data()

# We Flatten the incoming tensor, to convert its dimensions from [28, 28] to (28 x 28)
# We use astype to change type of data

print(tr_data.shape)
print(tr_data.dtype)

tr_data = lyrs.Flatten()(tr_data).astype('float32') / 255.0
ts_data = ts_data.reshape(-1,784).astype('float32') / 255.0

print(ts_data.shape)
print(tr_data.dtype)


In [None]:
model.compile(
    optimizer= opts.Adam(learning_rate= 0.005),
    loss= ls.SparseCategoricalCrossentropy(),
    metrics= ['accuracy']
)

his = model.fit(
    tr_data,
    tr_lbls,
    validation_split=0.25,
    batch_size=64,
    epochs=20,
    verbose=1
)

In [None]:


def plot_history(history, metric):
    r = len(history[metric])
    plt.plot(history[metric], label='Training')
    plt.plot(history['val_'+ metric], label='Validation')
    plt.title(metric)
    
    if metric == 'loss':
        plt.ylim([0, 1])
    else:
        plt.ylim([0.9, 1.0])
    plt.legend()
        

history = his.history
print(history.keys())


In [None]:

plot_history(history, 'loss')


In [None]:

plot_history(history, 'accuracy')


## **Save and serialize**
**The standard way to save a functional model is to call model.save() to save the entire model as a single file. You can later recreate the same model from this file, even if the code that built the model is no longer available.**

> **`The Saved Model includes:`**
1. **Model Architecture**
2. **Model Weights**
3. **Model training config**
4. **Optimizer and its state**

In [None]:
# save Model
model.save('SavedModel')

# delete existing model
# !! del model !!

# Reloading Model
model_loaded = tfk.models.load_model('SavedModel')
pred = model_loaded(ts_data[:10])


tf.nn.softmax(pred)
for i in pred:
    print(np.argmax(i))
    

## **Reusing graph of layers to define multiple**

In [None]:
# First we will create an Encoder by stacking layers.
# Then, we will reuse the existing layers graph to instantiate new model that decodes the encoder-output.

encoder_inp = lyrs.Input((28, 28, 1), name='InputLayer')
x = lyrs.Conv2D(128, 3, activation='relu')(encoder_inp)
x = lyrs.Conv2D(128, 3, activation='relu')(x)
x = lyrs.MaxPooling2D(3)(x)
x = lyrs.Conv2D(64, 3, activation='relu')(x)
x = lyrs.Conv2D(64, 3, activation='relu')(x)
encoder_out = lyrs.GlobalMaxPooling2D()(x)

model_encoder = tfk.Model(encoder_inp, encoder_out, name='Encoder')
model_encoder.summary()


In [None]:
# Now creating the decoder!!
autoencoder_inp = lyrs.Reshape((8, 8, 1))(encoder_out)
x = lyrs.Conv2DTranspose(128, 3, activation='relu')(autoencoder_inp)
x = lyrs.Conv2DTranspose(64, 3, activation='relu')(x)
x = lyrs.UpSampling2D(3)(x)
x = lyrs.Conv2DTranspose(32, 3, activation='relu')(x)
autoencoder_out = lyrs.Conv2DTranspose(1, 3, activation='relu')(x)

model_autoencoder = tfk.Model(encoder_inp, autoencoder_out, name='autoencoder')
model_autoencoder.summary()


## **Callable Models, just like layers**

> **Treating `NeuralNet-Models` like `KerasLayers` allows us to design modular-Models, which can take in any specific input and produce an output. Models can also be combined||stacked like layers to form pipelines(Model architecture)**

In [None]:
"""
    In this part, we are going to form a autoencoder by 
    first creating seperate encoders and decoders, then chaining them together 
    to form autoencoder architecture
"""

Encoder_Inp = tfk.Input(shape=(150, 150, 1), name='Encoded_Image')
lyr = lyrs.Conv2D(128, 3, activation='relu')(Encoder_Inp)
lyr = lyrs.Conv2D(128, 3, activation='relu')(lyr)
lyr = lyrs.Conv2D(128, 3, activation='relu')(lyr)
lyr = lyrs.MaxPooling2D(3)(lyr)

lyr = lyrs.Conv2D(64, 3, activation='relu')(lyr)
lyr = lyrs.Conv2D(64, 3, activation='relu')(lyr)
lyr = lyrs.Conv2D(32, 3, activation='relu')(lyr)
lyr = lyrs.MaxPooling2D(3)(lyr)

lyr = lyrs.Conv2D(32, 3, activation='relu')(lyr)
lyr = lyrs.Conv2D(32, 3, activation='relu')(lyr)
lyr = lyrs.Conv2D(16, 3, activation='relu')(lyr)
lyr = lyrs.MaxPooling2D(2)(lyr)
Encoder_Out = lyrs.GlobalMaxPooling2D()(lyr)

ModelEncoder = tfk.Model(Encoder_Inp, Encoder_Out, name='Encoder')
ModelEncoder.summary()


Decoder_Inp = tfk.Input(shape=(16, ), name='Encoded_Image')
lyr = lyrs.Reshape((4, 4, 1))(Decoder_Inp)
lyr = lyrs.UpSampling2D(3)(lyr)

lyr = lyrs.Conv2DTranspose(32, 2, activation='relu')(lyr)
lyr = lyrs.Conv2DTranspose(32, 3, activation='relu')(lyr)
lyr = lyrs.Conv2DTranspose(64, 3, activation='relu')(lyr)
lyr = lyrs.Conv2DTranspose(64, 3, activation='relu')(lyr)
lyr = lyrs.UpSampling2D(2)(lyr)

lyr = lyrs.Conv2DTranspose(128, 3, activation='relu')(lyr)
lyr = lyrs.Conv2DTranspose(128, 3, activation='relu')(lyr)
lyr = lyrs.UpSampling2D(3)(lyr)

lyr = lyrs.Conv2DTranspose(64, 3, activation='relu')(lyr)
lyr = lyrs.Conv2DTranspose(64, 3, activation='relu')(lyr)
output = lyrs.Conv2DTranspose(1, 3, activation='relu')(lyr)

ModelDecoder = tfk.Model(Decoder_Inp, output, name='Decoder')
ModelDecoder.summary()

autoenc_inp = tfk.Input((150, 150, 1), name='AutoEncoder_Input')

encoded_image = ModelEncoder(autoenc_inp)
decoded_image = ModelDecoder(encoded_image)

autoencoder = tfk.Model(autoenc_inp, decoded_image, name='AutoEncoder')
autoencoder.summary()

In [None]:
encoder_input = tfk.Input(shape=(28, 28, 1), name="original_img")
x = lyrs.Conv2D(16, 3, activation="relu")(encoder_input)
x = lyrs.Conv2D(32, 3, activation="relu")(x)
x = lyrs.MaxPooling2D(3)(x)
x = lyrs.Conv2D(32, 3, activation="relu")(x)
x = lyrs.Conv2D(16, 3, activation="relu")(x)
encoder_output = lyrs.GlobalMaxPooling2D()(x)

encoder = tfk.Model(encoder_input, encoder_output, name="encoder")
encoder.summary()

decoder_input = tfk.Input(shape=(16,), name="encoded_img")
x = lyrs.Reshape((4, 4, 1))(decoder_input)
x = lyrs.Conv2DTranspose(16, 3, activation="relu")(x)
x = lyrs.Conv2DTranspose(32, 3, activation="relu")(x)
x = lyrs.UpSampling2D(3)(x)
x = lyrs.Conv2DTranspose(16, 3, activation="relu")(x)
decoder_output = lyrs.Conv2DTranspose(1, 3, activation="relu")(x)

decoder = tfk.Model(decoder_input, decoder_output, name="decoder")
decoder.summary()

autoencoder_input = tfk.Input(shape=(28, 28, 1), name="img")
encoded_img = encoder(autoencoder_input)
decoded_img = decoder(encoded_img)
autoencoder = tfk.Model(autoencoder_input, decoded_img, name="autoencoder")
autoencoder.summary()


### **Ensemble**

> **As you can see, the model can be nested: a model can contain sub-models (since a model is just like a layer). A common use case for model nesting is `ensembling`. For example, here's how to ensemble a set of models into a single model that `averages their predictions`**

In [None]:
def get_Model(name):
    inp = tfk.Input(shape=(256, ))
    lyr = lyrs.Dense(128, activation=acts.relu)(inp)
    lyr = lyrs.Dense(64, activation=acts.relu)(lyr)
    out = lyrs.Dense(16, activation=acts.relu)(lyr)
    return tfk.Model(inp, out, name=name)

# Creating 3 Samilar Models...
m1 = get_Model('Model_1')
m2 = get_Model('Model_2')
m3 = get_Model('Model_3')

# Calling the models on an input-layer...
inp = tfk.Input(shape=(256, ))
y1, y2, y3 = m1(inp), m2(inp), m3(inp)

# Averaging the models_outputs using average layer...
out = lyrs.average([y1, y2, y3])

# Ensemble learning
EnsmebleModel = tfk.Model(inputs= inp, outputs= out, name='Ensemble_Model')
EnsmebleModel.summary()


## **Manipulate complex graph topologies**

> #### **Multi Inputs / Outputs**

**Printing Movie Tickets for customers**

**`Inputs:`**
1. The Title of the ticket
2. The text body of the ticket(categorical input)
3. Tags added by the user

**`Outputs:`**
1. Priority between 0-1
2. The department that should handle the ticket(softmax over the set of departments)


In [None]:
num_tags = 12 # Number of unique issue-tags.
num_words = 10000 # Size of vocabulary when preprocessing the text.
num_depts = 4 # Number of departments for predictions.

In [None]:
num_tags = 12  # Number of unique issue tags
num_words = 10000  # Size of vocabulary obtained when preprocessing text data
num_departments = 4  # Number of departments for predictions

title_input = tfk.Input(
    shape=(None,), name="title"
)  # Variable-length sequence of ints

body_input = tfk.Input(
    shape=(None,), name="body"
)  # Variable-length sequence of ints


tags_input = tfk.Input(
    shape=(num_tags,), name="tags"
)  # Binary vectors of size `num_tags`

In [None]:

# Embed each word in the title into a 64-dimensional vector
title_features = lyrs.Embedding(num_words, 64)(title_input)
# Embed each word in the text into a 64-dimensional vector
body_features = lyrs.Embedding(num_words, 64)(body_input)

# Reduce sequence of embedded words in the title into a single 128-dimensional vector
title_features = lyrs.Bidirectional(lyrs.LSTM(128))(title_features)
# Reduce sequence of embedded words in the body into a single 32-dimensional vector
body_features = lyrs.Bidirectional(lyrs.LSTM(128))(body_features)

# Merge all available features into a single large vector via concatenation
x = lyrs.concatenate([title_features, body_features, tags_input])

# Stick a logistic regression for priority prediction on top of the features
priority_pred = lyrs.Dense(1, name="priority")(x)
# Stick a department classifier on top of the features
department_pred = lyrs.Dense(num_departments, name="department")(x)

# Instantiate an end-to-end model predicting both priority and department
model = tfk.Model(
    inputs=[title_input, body_input, tags_input],
    outputs=[priority_pred, department_pred],
)

tfk.utils.plot_model(model, 'Model_TicketClassifier.png', show_shapes=True)

In [None]:
model.compile(
    optimizer=tfk.optimizers.RMSprop(1e-3),
    loss=[
        tfk.losses.BinaryCrossentropy(from_logits=True),
        tfk.losses.CategoricalCrossentropy(from_logits=True),
    ],
    loss_weights=[1.0, 0.2],
)


# Dummy input data
title_data = np.random.randint(num_words, size=(1280, 10))
body_data = np.random.randint(num_words, size=(1280, 100))
tags_data = np.random.randint(2, size=(1280, num_tags)).astype("float32")

# Dummy target data
priority_targets = np.random.random(size=(1280, 1))
dept_targets = np.random.randint(2, size=(1280, num_departments))

model.fit(
    {"title": title_data, "body": body_data, "tags": tags_data},
    {"priority": priority_targets, "department": dept_targets},
    epochs=2,
    batch_size=32,
)


> #### **Mini Resnet**

In [None]:
# def res_block():
#   pass

In [None]:
inp = tfk.Input(shape=(32, 32, 3), name='Img')

# Block 1
ten = lyrs.Conv2D(32, 3, activation='relu', padding='same')(inp)
ten = lyrs.Conv2D(64, 3, activation='relu', padding='same')(ten)
bl_1 = lyrs.MaxPooling2D(3)(ten)

# Block 2
ten = lyrs.Conv2D(32, 3, activation='relu', padding='same')(bl_1)
ten = lyrs.Conv2D(64, 3, activation='relu', padding='same')(ten)
bl_2 = lyrs.add([ten, bl_1])

# Block 3
ten = lyrs.Conv2D(32, 3, activation='relu', padding='same')(bl_2)
ten = lyrs.Conv2D(64, 3, activation='relu', padding='same')(ten)
bl_3 = lyrs.add([ten, bl_2])

ten = lyrs.Conv2D(32, 3, activation='relu', padding='same')(bl_3)
# ten = lyrs.Conv2D(64, 3, activation='relu', padding='same')(ten)
ten = lyrs.GlobalAveragePooling2D()(ten)
ten = lyrs.Dense(256, activation=acts.elu)(ten)
ten = lyrs.Dropout(0.4)(ten)

output = lyrs.Dense(10, activation='relu')(ten)

Model_MiniResnet = tfk.Model(inputs= inp, outputs= output, name='ToyResnet.png')
tfk.utils.plot_model(Model_MiniResnet, 'Mini_Resnet.png', show_shapes=True)

In [None]:
Model_MiniResnet.summary()

In [None]:
(xtrain, ytrain), (xtest, ytest) = tfk.datasets.cifar10.load_data()
xtrain = xtrain.astype('float32') / 255.0
xtest = xtest.astype('float32') / 255.0

ytrain = tfk.utils.to_categorical(ytrain, 1)
ytest = tfk.utils.to_categorical(ytest, 10)

Model_MiniResnet.compile(
    optimizer= opts.RMSprop(0.001),
    loss= ls.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

Model_MiniResnet.fit(
    xtrain, ytrain, epochs= 20, verbose= 2, batch_size= 32, validation_split= 0.2
)

## **Shared Layers**



In [None]:
'''
    Shared layers are often used to encode inputs from similar spaces 
    (say, two different pieces of text that feature similar vocabulary). 
    They enable sharing of information across these different inputs, 
    and they make it possible to train such a model on less data. 
    If a given word is seen in one of the inputs, that will benefit the 
    processing of all inputs that pass through the shared layer.
'''

max_f = 10000

# Embedding for 1000 unique words mapped to 128-dimensional vectors
emb_lyr = lyrs.Embedding(max_f, 128, 'uniform')

# Inputs
inp_a = tfk.Input(shape=(None, ))
inp_b = tfk.Input(shape=(None, ))

# Getting 2 sets of embeddings using the same layer
inp_a_encoded = emb_lyr(inp_a)
inp_b_encoded = emb_lyr(inp_b)

## **Extract and reuse Nodes in graph of layers**

> **Because the graph of layers you are manipulating is a static data structure, it can be accessed and inspected. This also means that you can access the activations of intermediate layers ("nodes" in the graph) and reuse them elsewhere -- which is very useful for something like feature extraction.**

**This comes in handy for tasks like `neural style transfer`, among other things.**

In [8]:
# Feature extraction from a VGG19 trained on Imagenet dataset
vgg19 = tfk.applications.VGG19()

# To extract features, we create a list of all the outputs of 
# the imtermediate Nodes in the VGG19 Model
fea_list = [lyr.output for lyr in vgg19.layers]

# Features Extraction Model!!
fem = tfk.Model(inputs=vgg19.input, outputs=fea_list)
fem

<keras.engine.functional.Functional at 0x261f24f5130>

## **Extend the Func-API using custom-layers**

You can use the func api to create custom dense layers or create blocks of Models(reusable) using this technique. 

## **Practical Use**

1. **In general, the functional API is higher-level, easier and safer, and has a number of features that subclassed models do not support.**
2. **Supports multi-input/output, complex graph topologies compared to Sequential API.**

> However, model subclassing provides greater flexibility when building models that are not easily expressible as directed acyclic graphs of layers. For example, you could not implement a Tree-RNN with the functional API and would have to subclass Model directly.

**For an in-depth look at the differences between the `Functional API and Model-Subclassing`, read `What are Symbolic and Imperative APIs in TensorFlow 2.0?`.**


> ### **Strengths**

1. **Less verbose**
2. **Supports complex graph-based model topologies.**
3. **Model validation while defining its connectivity graph.(Defining the Input shape at the beginning ensures that any Model `built with functional API` will run).**
4. **Plottable and Inspectable.**
5. **Functional Model can be serialized or cloned.**

> #### **Weaknesses**

1. **Does not support dynamic structures(outputs a static graph datastructure.)**
> **The functional API treats models as DAG `(Directed Acyclic Graph)` of layers. This is true for most deep learning architectures, but not all -- for example, recursive networks or Tree RNNs do not follow this assumption and cannot be implemented in the functional API.**

## **Mix n Match API styles**

**Using Functional Model with Subclassed Models**

In [10]:
# Hyper-parameters
units = 32
timesteps = 10
inp_dim = 5
bs = 16

# Define a Functional Model...

Input = tfk.Input(shape=(None, units))
lyr = lyrs.GlobalAveragePooling1D()(Input)
Output = lyrs.Dense(1)(lyr)

Model_Func = tfk.Model(
    inputs= Input,
    outputs= Output,
    name='Functional_Protion'
)


In [None]:
class CustomRNN(lyrs.Layer):
    def __init__(self):
        super(CustomRNN, self).__init__()
        self.units = units
        self.p1 = lyrs.Dense(units= units, activation=acts.tanh) # First Dense Layer
        self.p2 = lyrs.Dense(units= units, activation=acts.tanh) # Second Dense Layer
        
        self.classifier = Model_Func
        
    def call(self, input_ten):
        outputs = []
        state = tf.zeros(shape=(input_ten.shape[0], self.units))
        
        # Defining Logic for Custom RNN!!
        for t in range(input_ten.shape[1]):
            # Get the t'th tensor at each level
            inp = input_ten[ : , t, : ]
            ten = self.p1(inp)
            ten = self.p2(state) + ten
            
            state = ten
            outputs.append(ten)
            
        features = tf.stack(outputs, axis= -1)
        
        print(features.shape)
        return self.classifier(features)

rnnModel = CustomRNN()
...
# rnnModel(tf.zeros(shape=(1, timesteps, inp_dim)))
# Last line throws an Input Shape Error!! 

In [None]:
# Note that you specify a static batch size for the inputs with the `batch_shape`
# arg, because the inner computation of `CustomRNN` requires a static batch size
# (when you create the `state` zeros tensor).

# inputs = tfk.Input(batch_shape= (bs, timestamps, inp_dim))
# x = lyrs.Conv1D(32, 3)(inputs)
# outputs = CustomRNN()(x)

# model = tfk.Model(inputs, outputs)
# model
rnn_model = CustomRNN()
_ = rnn_model(tf.zeros((1, timesteps, input_dim)))


In [16]:
units = 32
timesteps = 10
input_dim = 5
batch_size = 16


class CustomRNN(lyrs.Layer):
    def __init__(self):
        super(CustomRNN, self).__init__()
        self.units = units
        self.projection_1 = lyrs.Dense(units=units, activation="tanh")
        self.projection_2 = lyrs.Dense(units=units, activation="tanh")
        self.classifier = lyrs.Dense(1)

    def call(self, inputs):
        outputs = []
        state = tf.zeros(shape=(inputs.shape[0], self.units))
        for t in range(inputs.shape[1]):
            x = inputs[:, t, :]
            h = self.projection_1(x)
            y = h + self.projection_2(state)
            state = y
            outputs.append(y)
        features = tf.stack(outputs, axis=1)
        return self.classifier(features)


# Note that you specify a static batch size for the inputs with the `batch_shape`
# arg, because the inner computation of `CustomRNN` requires a static batch size
# (when you create the `state` zeros tensor).
inputs = tfk.Input(batch_shape=(batch_size, timesteps, input_dim))
x = lyrs.Conv1D(32, 3)(inputs)
outputs = CustomRNN()(x)

model = tfk.Model(inputs, outputs)

rnn_model = CustomRNN()
_ = rnn_model(tf.zeros((1, 10, 5)))
