# Introduction to Tensorflow : Set training pipeline
### By: Diego Coello de Portugal Mecke

This notebook aims to create model trainning pipeline without using standard tensorflow functionalities.

The data is synthetic (sinuosoidal function) with some outliers for test robustness of the model.
A numoy array and a generator will be used to prove the generality of the implemented functionalities.

The usage of regularization and gradient clipping will be tested for the case with outliers.

----

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model, regularizers
import numpy as np
import pandas as pd
import pickle as pk
import matplotlib.pyplot as plt
#from google.colab import drive

print(tf.__version__)
print(np.__version__)
print(pd.__version__)

ImportError: cannot import name 'dtensor' from 'tensorflow.compat.v2.experimental' (C:\Users\omen1\anaconda3\envs\dllab\lib\site-packages\tensorflow\_api\v2\compat\v2\experimental\__init__.py)

In [None]:
#Set seeds for reproducibility
tf.random.set_seed(24)

random_state = 24
np.random.state = random_state
np.random.seed = random_state

## Data Loader

In [None]:
def loadData(n=16000, outliers=False):
    
    # Get data
    x = np.random.rand(n,1)*2*np.pi-np.pi 
    y = np.sin(x)
    
    # Add outliers
    if outliers:
        y[np.random.randint(0,n,3)]=1000.
    
    return x , y

In [None]:
x,y = loadData()
plt.scatter(x,y)

## Create a class to define and use model

In [None]:
# Implementation of simple feedforward network
class FCNet(Model):
    def __init__(self, neurons=[12,12,3], reg=None,activation="relu"):
        super(FCNet, self).__init__()

        self.denseLayers=[]
        for idx,neuron in enumerate(neurons):
            self.denseLayers.append(Dense(neuron, activation="relu"))

        self.outputLayer = Dense(1, activation=None)

    def call(self, input_x):
        output = input_x

        for layer in self.denseLayers:
            output = layer(output)

        return self.outputLayer(output)

## Optimization routine

In [None]:
class TrainModel:

    def __init__(self, model, batch_size = 8, lr = 0.001, loss = tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.Adam):

        self.model      = model
        self.loss       = loss()
        self.optimizer  = opt(learning_rate = lr)
        self.batch_size = batch_size

        self.train_loss = tf.keras.metrics.Mean(name='train_loss')

        self.test_loss  = tf.keras.metrics.Mean(name='test_loss')


    @tf.function
    def train_step(self, x , y):
        with tf.GradientTape() as tape:
            predictions = self.model(x) #Update
            loss = self.loss(y, predictions)

        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
        self.train_loss.update_state(loss)
        return loss

    @tf.function
    def test_step(self, x , y):
        predictions = self.model(x)
        loss = self.loss(y, predictions)
        self.test_loss.update_state(loss)
        return loss

    def train(self):
        loss = []
        for bX, bY in self.train_ds:
            loss.append(self.train_step(bX, bY))
        return loss

    def test(self):
        loss = []
        for bX, bY in self.test_ds:
            loss.append(self.test_step(bX, bY))  
        return loss 

    def run(self, dataX, dataY, testX, testY, epochs, verbose=1):
        history = []

        self.train_ds = tf.data.Dataset.from_tensor_slices((dataX, dataY)).shuffle(16000).batch(self.batch_size)
        self.test_ds  = tf.data.Dataset.from_tensor_slices((testX,testY)).batch(self.batch_size)

        for i in range(epochs):

            train_loss = self.train()
            test_loss  = self.test()

            history.append([train_loss,test_loss])

            if verbose > 0 and (i==0 or (i+1)%5==0):
                
                print(f"epoch: {i+1}, TRAIN LOSS: {self.train_loss.result()}, TEST LOSS: {self.test_loss.result()}")

                self.train_loss.reset_states()
                self.test_loss.reset_states()

        return history

In [None]:
x_train, y_train = loadData()
x_test, y_test  = loadData()

model = FCNet()
opt   = TrainModel(model, batch_size=8, lr=0.001, loss=tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.SGD)

hist = opt.run(x_train, y_train, x_test, y_test, epochs=20, verbose=1)
hist = np.array(hist)

In [None]:
plt.plot(np.mean(hist,-1)[:,0],label="train")
plt.plot(np.mean(hist,-1)[:,1],label="test")
plt.xlabel("Epochs")
plt.ylabel("MSE")
plt.title("Training/Test Loss Convergence")
plt.legend()

## Data generator

In [None]:
def genData(batch_size=100):
    while True:
        x = np.random.rand(batch_size,1)*2*np.pi-np.pi 
        y = np.sin(x)
        yield x , y

In [None]:
#Definition of RunModel class (variation of TrainClass with new run method)
class RunModel:

    def __init__(self, model, batch_size=8, lr=0.001, loss=tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.Adam):

        self.model      = model
        self.loss       = loss()
        self.optimizer  = opt(learning_rate = lr)
        self.batch_size = batch_size

        self.train_loss = tf.keras.metrics.Mean(name='train_loss')
        self.test_loss  = tf.keras.metrics.Mean(name='test_loss')


    @tf.function
    def train_step(self, x , y):
        with tf.GradientTape() as tape:
            inp = tf.convert_to_tensor([x], dtype=tf.float64)
            out = tf.convert_to_tensor([y], dtype=tf.float64)
            predictions = model(inp)
            loss = self.loss(out, predictions)

        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
        self.train_loss.update_state(loss)
        return loss

    @tf.function
    def test_step(self, x , y):
        predictions = self.model(x)
        loss = self.loss(y, predictions)
        self.test_loss.update_state(loss)
        return loss

    def train(self):
        loss = []
        for bX, bY in self.train_ds:
            loss.append(self.train_step(bX, bY))
        return loss

    def test(self):
        loss = []
        for bX, bY in self.test_ds:
            loss.append(self.test_step(bX, bY))  
        return loss 

    def run(self, trainGen, testGen, epochs, verbose=2): #Update
        history = []
        batch_size = (next(trainGen)[0]).shape[0] #Update

        for i in range(epochs):
            self.train_ds = tf.data.Dataset.from_tensor_slices(next(train_gen)).batch(batch_size) #Update
            self.test_ds  = tf.data.Dataset.from_tensor_slices(next(test_gen)).batch(batch_size) #Update

#             self.train_ds = tf.data.Dataset.from_generator(train_gen)
#             self.test_ds  = tf.data.Dataset.from_generator(test_gen)

            train_loss = self.train()
            test_loss  = self.test()

            history.append([train_loss,test_loss])

            if verbose > 0 and (i==0 or (i+1)%5==0):
                print(f"epoch: {i+1}, TRAIN LOSS: {self.train_loss.result()}, TEST LOSS: {self.test_loss.result()}")

                self.train_loss.reset_states()
                self.test_loss.reset_states()

        return history

In [None]:
train_gen = genData(batch_size=16000)
test_gen  = genData(batch_size=16000)

# Running this requires to update RunModel
model  = FCNet()
opt    = RunModel(model, lr=0.001, loss=tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.SGD)

hist = opt.run(train_gen, test_gen, 30, verbose=1)
hist = np.array(hist)

In [None]:
plt.plot(np.mean(hist,-1)[:,0],label="train")
plt.plot(np.mean(hist,-1)[:,1],label="test")
plt.xlabel("Epochs")
plt.ylabel("MSE")
plt.title("Training/Test Loss Convergence")
plt.legend()

The result difference between the generator and the numpy case are due to the difference in data. One epoch of the generator has 8 instances, while the generator has 16000 instances.

## Test L2 regularization and gradient clipping by norm to the model.

Test implementations by running the original code with outliers in the data.

In [None]:
xn_train, yn_train = loadData(outliers=True)
x_test, y_test   = loadData()

In [None]:
plt.scatter(xn_train, yn_train)

Add regularization in the layers

In [None]:
# Implementation of simple feedforward network with regularization
class FCNet2(Model):
    def __init__(self, neurons=[12,12,3], reg=0.001, activation="relu"): #Update
        super(FCNet2, self).__init__()

        self.denseLayers=[]
        for idx,neuron in enumerate(neurons):
            self.denseLayers.append(Dense(neuron, kernel_regularizer=regularizers.l2(reg), activation="relu")) #Update

        self.outputLayer = Dense(1, kernel_regularizer=regularizers.l2(reg), activation=None)

    def call(self, input_x):
        output = input_x

        for layer in self.denseLayers:
            output = layer(output)

        return self.outputLayer(output)


Adding clipvalue to the optimizer.

In [None]:
class TrainModel2:

    def __init__(self, model, batch_size=8, lr=0.001, loss=tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.Adam,\
                clip_value=1.0):

        self.model      = model
        self.loss       = loss()
        self.optimizer  = opt(learning_rate=lr, clipvalue=clip_value) #Update
        self.batch_size = batch_size

        self.train_loss = tf.keras.metrics.Mean(name='train_loss')

        self.test_loss  = tf.keras.metrics.Mean(name='test_loss')


    @tf.function
    def train_step(self, x , y):
        with tf.GradientTape() as tape:
            predictions = self.model(x)
            loss = self.loss(y, predictions) + sum(self.model.losses)

        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients,self.model.trainable_variables))
        self.train_loss.update_state(loss)
        return loss

    @tf.function
    def test_step(self, x , y):
        predictions = self.model(x)
        loss = self.loss(y, predictions)
        self.test_loss.update_state(loss)
        return loss

    def train(self):
        loss = []
        for bX, bY in self.train_ds:
            loss.append(self.train_step(bX, bY))
        return loss

    def test(self):
        loss = []
        for bX, bY in self.test_ds:
            loss.append(self.test_step(bX, bY))  
        return loss 

    def run(self, dataX, dataY, testX, testY, epochs, verbose=2):
        history = []

        self.train_ds = tf.data.Dataset.from_tensor_slices((dataX, dataY)).shuffle(16000).batch(self.batch_size)
        self.test_ds  = tf.data.Dataset.from_tensor_slices((testX,testY)).batch(self.batch_size)

        for i in range(epochs):

            train_loss = self.train()
            test_loss  = self.test()

            history.append([train_loss,test_loss])

            if verbose > 0 and (i==0 or (i+1)%5==0):
                print(f"epoch: {i+1}, TRAIN LOSS: {self.train_loss.result()}, TEST LOSS: {self.test_loss.result()}")


                self.train_loss.reset_states()
                self.test_loss.reset_states()

        return history

## Base model

In [None]:
model = FCNet()

opt = TrainModel(model, batch_size=8, lr=0.001, loss=tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.Adam)

hist = opt.run(xn_train, yn_train, x_test, y_test, 30, verbose=1)
hist = np.array(hist)

In [None]:
fig, axs = plt.subplots(1,3,figsize=(15,5))

print('------Base model for outliers------')

axs[0].plot(np.mean(hist,-1)[:,0],label="train")
axs[0].set_title("Training Loss Convergence")
axs.flat[0].set(xlabel='Epochs', ylabel='MSE')

axs[1].plot(np.mean(hist,-1)[:,1],label="test")
axs[1].set_title("Test Loss Convergence")
axs.flat[1].set(xlabel='Epochs', ylabel='MSE')

axs[2].scatter(x_test,y_test,label="true")
axs[2].scatter(x_test, model(x_test),label="pred")

## Model result with regularization

In [None]:
model = FCNet2(reg=0.001)

opt = TrainModel(model, batch_size=8, lr=0.001, loss=tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.Adam)

hist = opt.run(xn_train, yn_train, x_test, y_test, 30, verbose=1)
hist = np.array(hist)

In [None]:
fig, axs = plt.subplots(1,3,figsize=(15,5))

print('------Model result with regularization for outliers------')

axs[0].plot(np.mean(hist,-1)[:,0],label="train")
axs[0].set_title("Training Loss Convergence")
axs.flat[0].set(xlabel='Epochs', ylabel='MSE')

axs[1].plot(np.mean(hist,-1)[:,1],label="test")
axs[1].set_title("Test Loss Convergence")
axs.flat[1].set(xlabel='Epochs', ylabel='MSE')

axs[2].scatter(x_test,y_test,label="true")
axs[2].scatter(x_test, model(x_test),label="pred")

This specific seed doesn't return very good results for this setting, but in general it should be a little bit better than the base model. The test loss plot can be seen as a prove of this, since there are previous epochs with better performance than the last iteration (from which we take the final loss).

## Model trainning with gradient clipping

In [None]:
model = FCNet()

opt = TrainModel2(model, batch_size=8, lr=0.001, loss=tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.Adam,\
                clip_value=2.0)

hist = opt.run(xn_train, yn_train, x_test, y_test, 30, verbose=1)
hist = np.array(hist)

In [None]:
fig, axs = plt.subplots(1,3,figsize=(15,5))

print('------Model result with gradient clipping for outliers------')

axs[0].plot(np.mean(hist,-1)[:,0],label="train")
axs[0].set_title("Training Loss Convergence")
axs.flat[0].set(xlabel='Epochs', ylabel='MSE')

axs[1].plot(np.mean(hist,-1)[:,1],label="test")
axs[1].set_title("Test Loss Convergence")
axs.flat[1].set(xlabel='Epochs', ylabel='MSE')

axs[2].scatter(x_test,y_test,label="true")
axs[2].scatter(x_test, model(x_test),label="pred")

## Model trainning with gradient clipping and regularization

In [None]:
model = FCNet2(reg=0.001)

opt = TrainModel2(model, batch_size=8, lr=0.001, loss=tf.keras.losses.MeanSquaredError, opt=tf.keras.optimizers.Adam,\
                clip_value=2.0)

hist = opt.run(xn_train, yn_train, x_test, y_test, 30, verbose=1)
hist = np.array(hist)

In [None]:
fig, axs = plt.subplots(1,3,figsize=(15,5))

print('------Model result with regularization and gradient clipping for outliers------')

axs[0].plot(np.mean(hist,-1)[:,0],label="train")
axs[0].set_title("Training Loss Convergence")
axs.flat[0].set(xlabel='Epochs', ylabel='MSE')

axs[1].plot(np.mean(hist,-1)[:,1],label="test")
axs[1].set_title("Test Loss Convergence")
axs.flat[1].set(xlabel='Epochs', ylabel='MSE')

axs[2].scatter(x_test,y_test,label="true")
axs[2].scatter(x_test, model(x_test), label="pred")

## Results and conclusions:

| *Loss* | Clipping | No clipping |
| --- | --- | --- |
| Regularization | **0.0004** | 0.03885 |
| No Regularization | 0.00148 | 0.05 |

After some quick testing for hyperparameter tunning, regularization doesn't seem to have such a big influence for performance compared to gradient clipping.

This is consistent to the idea that regularization aims to avoid overfitting, while gradient clipping is more suitable to avoid the model diverging due to poissoned data.

## Rewriting the code to different files according to their names and purpose:

- data_loading.py
- model.py
- train.py
- run.py

In [None]:
raise Exception('STOP CELL')

In [None]:
#When using google drive
from google.colab import drive
import sys
drive.mount('/content/gdrive', force_remount=True)
sys.path.append('/content/gdrive/My Drive/') # CHANGE THIS LINE DEPENDING OF WHERE YOU PUT YOUR FILES IN GOOGLE DRIVE

from run import run_experiment
run_experiment()

In [None]:
#When loading from local folder
from python_files.run import run_experiment
run_experiment()