# Training deep learning model using TensorFlow

This example demonstrates TensorFlow based model training.

It's main focus is to show how the components described in `Implementing and training a model in TensorFlow` section can be put together.

Therefore model architecture and data processing is simplified that the trained model does not work.

In [138]:
import pyspark
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [139]:
print(tf.__version__)

2.8.1


In [140]:
import pandas as pd
import numpy as np

## Data loading logics

For this example, we will use google scholar data that we crawled in Chapter 2

In [141]:
data = pd.read_csv("sample_google_scholar.csv")

In [142]:
data = data.dropna()
data.head()

Unnamed: 0,author_name,email,affiliation,coauthors_names,research_interest
0,Lawrence Holder,wsu.edu,Washington State University,Diane J Cook##William Eberle,artificial_intelligence##machine_learning##dat...
3,Diane J Cook,eecs.wsu.edu,Washington State University,Lawrence Holder##Parisa Rashidi##Sajal K. Das#...,artificial_intelligence##machine_learning##sma...
4,Sumi Helal IEEE Fellow AAAS Fellow IET Fellow ...,cise.ufl.edu,University of Florida,Raja Bose##Darrell Woelk##Diane J Cook##Yousse...,digital_health##smart_homes##internet_of_thing...
5,Hani Hagras,essex.ac.uk,University of Essex,Christian Wagner,explainable_artificial_intelligence##ambient_i...
6,Anupam Joshi,umbc.edu,UMBC,Tim Finin##Yelena Yesha##Lalana Kagal##Dipanja...,data_management##mobile_computing##security##s...


In [143]:
# for features, we will convert first 10 characters of affiliation into a vector of float 
# by dividing each character by maximum axcii number (256)

def convert_first_ten_characters_into_tensor(data):
    first_ten_characters = data[:10]
    converted = [ord(char)/256 for char in first_ten_characters]
    while len(converted) < 10:
        converted.append(0.0)
    return np.array(converted)

converted_affiliation = data['affiliation'].map(convert_first_ten_characters_into_tensor)
affiliation = np.vstack(converted_affiliation.values)
print(affiliation[:5])

[[0.33984375 0.37890625 0.44921875 0.40625    0.41015625 0.4296875
  0.40234375 0.453125   0.43359375 0.4296875 ]
 [0.33984375 0.37890625 0.44921875 0.40625    0.41015625 0.4296875
  0.40234375 0.453125   0.43359375 0.4296875 ]
 [0.33203125 0.4296875  0.41015625 0.4609375  0.39453125 0.4453125
  0.44921875 0.41015625 0.453125   0.47265625]
 [0.33203125 0.4296875  0.41015625 0.4609375  0.39453125 0.4453125
  0.44921875 0.41015625 0.453125   0.47265625]
 [0.33203125 0.30078125 0.2578125  0.26171875 0.         0.
  0.         0.         0.         0.        ]]


In [144]:
# for labels, it will be boolean value; True if email consists of '.edu' and False otherwise
converted_email = data['email'].str.contains('.edu')
labels = converted_email.values
labels[:5]

array([ True,  True,  True, False,  True])

### This concludes this simple preprocessing step. As result, we have features (called affiliation) and labels
In real life, this could be a separate processing job executed for example via spark job. 
Let's save those results as csv file. 

In [145]:
full_df = pd.concat([pd.DataFrame(affiliation),pd.DataFrame(labels)],axis=1, ignore_index=True)
full_df.to_csv('data.csv', index=False, header=True)
full_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.339844,0.378906,0.449219,0.40625,0.410156,0.429688,0.402344,0.453125,0.433594,0.429688,True
1,0.339844,0.378906,0.449219,0.40625,0.410156,0.429688,0.402344,0.453125,0.433594,0.429688,True
2,0.332031,0.429688,0.410156,0.460938,0.394531,0.445312,0.449219,0.410156,0.453125,0.472656,True
3,0.332031,0.429688,0.410156,0.460938,0.394531,0.445312,0.449219,0.410156,0.453125,0.472656,False
4,0.332031,0.300781,0.257812,0.261719,0.0,0.0,0.0,0.0,0.0,0.0,True


We can also transform this pandas dataframe to two columns 

In [146]:
full_df['labels'] = full_df.iloc[: , -1].values
full_df['features']= full_df.iloc[: , :-2].values.tolist()
for i in range(11):
    full_df.pop(i)
full_df.to_csv('data_2c.csv', index=False, header=True)

In [147]:
full_df.head()

Unnamed: 0,labels,features
0,True,"[0.33984375, 0.37890625, 0.44921875, 0.40625, ..."
1,True,"[0.33984375, 0.37890625, 0.44921875, 0.40625, ..."
2,True,"[0.33203125, 0.4296875, 0.41015625, 0.4609375,..."
3,False,"[0.33203125, 0.4296875, 0.41015625, 0.4609375,..."
4,True,"[0.33203125, 0.30078125, 0.2578125, 0.26171875..."


In [148]:
import tensorflow_datasets as tfds 
import json

In [149]:
config = {}
config['path'] = "data.csv"
config['path_2c'] = "data_2c.csv"
config['tfds_dataset'] = "mnist"

In [150]:
print(config.get('path'))

data.csv


In [151]:
class DataLoader: 
    """ DataLoader class"""
    @staticmethod 
    def load_data_tfds(config): 
        '''
        loads predifined datset from tfds 
        '''
        print(config.get('tfds_dataset'))
        return tfds.load(config.get('tfds_dataset'), split=tfds.Split.TRAIN, as_supervised="True")
    
    @staticmethod 
    def load_data_csv(config, tag, batch_size, label_name, select_columns): 
        '''
        loads predifined datset from tfds 
        '''
        return tf.data.experimental.make_csv_dataset(config.get(tag), 
                                                     batch_size=batch_size,
                                                     label_name=label_name,
                                                     select_columns=select_columns)
    
    @staticmethod
    def load_data_from_nump_arrays(feature, label):
        '''
        loads data from numpy to tf.data.Dataset
        '''
        return tf.data.Dataset.from_tensor_slices( (feature, label) )
    
    @staticmethod
    def load_data_from_pandas(df):
        '''
        loads data from pandas to tf.data.Dataset
        '''
        return tf.data.Dataset.from_tensor_slices((df.iloc[: , :-1].values, df.iloc[: , -1].values))
    
    

### Let's start with loading data from csv
We have prepared two versions: 
1. csv with two columns 
2. csv with 10 columns
Let's start with first point, and then look at the second one. 

In [166]:
label_name = 'labels'
select_columns = [0,1]

In [167]:
data = DataLoader().load_data_csv(config, 'path_2c', 1, label_name, select_columns)

In [168]:
print(data)

<PrefetchDataset element_spec=(OrderedDict([('features', TensorSpec(shape=(1,), dtype=tf.string, name=None))]), TensorSpec(shape=(1,), dtype=tf.string, name=None))>


In this case, we see that our feature column is saved as a single tensor of shape (1,) that holds our feature arrays as string representations. Therefore, some additional processing might be required to use that dataset. The `map` method can be used in this situation.  

In [246]:
for feature in data.take(1):
    print('Tensor:',feature)

Tensor: (OrderedDict([('features', <tf.Tensor: shape=(1,), dtype=string, numpy=
array([b'[0.3046875, 0.39453125, 0.46484375, 0.125, 0.34765625, 0.43359375, 0.4453125, 0.41796875, 0.125, 0.28515625]'],
      dtype=object)>)]), <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'True'], dtype=object)>)


Let's move on to the second csv file case. 

In [155]:
label_name = '10'
select_columns = range(11)

In [156]:
data = DataLoader().load_data_csv(config, 'path', 1, label_name, select_columns)

In [157]:
print(data)

<PrefetchDataset element_spec=(OrderedDict([('0', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('1', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('2', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('3', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('4', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('5', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('6', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('7', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('8', TensorSpec(shape=(1,), dtype=tf.float32, name=None)), ('9', TensorSpec(shape=(1,), dtype=tf.float32, name=None))]), TensorSpec(shape=(1,), dtype=tf.string, name=None))>


We can use `map` method to transfor this dataset to a desired format. 

In [271]:
def feature_vector(samples, labels):
    return [ x for x in list(samples.values()) ], labels

In [272]:
dataset = data.map(feature_vector)

In [273]:
print(dataset)

<MapDataset element_spec=(TensorSpec(shape=(1, 1), dtype=tf.string, name=None), TensorSpec(shape=(1,), dtype=tf.string, name=None))>


In [274]:
for feature in dataset.take(1):
    print('Tensor:',feature)

Tensor: (<tf.Tensor: shape=(1, 1), dtype=string, numpy=
array([[b'[0.3046875, 0.43359375, 0.4453125, 0.453125, 0.40625, 0.46484375, 0.39453125, 0.44921875, 0.453125, 0.39453125]']],
      dtype=object)>, <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'True'], dtype=object)>)


Now, we can see how to load tfds datset. In this example we will load mnist dataset. 

In [275]:
data = DataLoader().load_data_tfds(config)

mnist


In [276]:
print(data)

<PrefetchDataset element_spec=(TensorSpec(shape=(28, 28, 1), dtype=tf.uint8, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>


Let's see how we can create a dataset using numpy arrays:

In [277]:
data = DataLoader().load_data_from_nump_arrays(affiliation, labels)

In [278]:
print(data)

<TensorSliceDataset element_spec=(TensorSpec(shape=(10,), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.bool, name=None))>


Finally, we can create datset using pandas dataframe

In [279]:
full_df = pd.concat([pd.DataFrame(affiliation),pd.DataFrame(labels)],axis=1, ignore_index=True)
full_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.339844,0.378906,0.449219,0.40625,0.410156,0.429688,0.402344,0.453125,0.433594,0.429688,True
1,0.339844,0.378906,0.449219,0.40625,0.410156,0.429688,0.402344,0.453125,0.433594,0.429688,True
2,0.332031,0.429688,0.410156,0.460938,0.394531,0.445312,0.449219,0.410156,0.453125,0.472656,True
3,0.332031,0.429688,0.410156,0.460938,0.394531,0.445312,0.449219,0.410156,0.453125,0.472656,False
4,0.332031,0.300781,0.257812,0.261719,0.0,0.0,0.0,0.0,0.0,0.0,True


In [280]:
data = DataLoader().load_data_from_pandas(full_df)

In [373]:
print(data)

<TensorSliceDataset element_spec=(TensorSpec(shape=(10,), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.bool, name=None))>


In [376]:
for feature in data.take(1):
    print('Tensor:',feature)

Tensor: (<tf.Tensor: shape=(10,), dtype=float64, numpy=
array([0.33984375, 0.37890625, 0.44921875, 0.40625   , 0.41015625,
       0.4296875 , 0.40234375, 0.453125  , 0.43359375, 0.4296875 ])>, <tf.Tensor: shape=(), dtype=bool, numpy=True>)


Next, let's have a look option that is using python generator

In [298]:
print( affiliation[0] )

[0.33984375 0.37890625 0.44921875 0.40625    0.41015625 0.4296875
 0.40234375 0.453125   0.43359375 0.4296875 ]


In [299]:
print( labels[0] )

True


In [318]:
def data_generator(affiliation, labels): 
    def fetch_examples(): 
        i = 0 
        while True: 
            example = (affiliation[i], labels[i]) 
            i += 1 
            i %= len(labels) 
            yield example 
    return fetch_examples

In [325]:
batch_size = 5
features_shape = 10

dataset_gen = tf.data.Dataset.from_generator(data_generator(affiliation, labels), 
                                                  output_types=(tf.float32, tf.bool), 
                                                  output_shapes=(tf.TensorShape(features_shape,), 
                                                                 tf.TensorShape(None))).batch(batch_size)


In [328]:
for feature in dataset_gen.take(1):
    print('Tensor:',feature)

Tensor: (<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[0.33984375, 0.37890625, 0.44921875, 0.40625   , 0.41015625,
        0.4296875 , 0.40234375, 0.453125  , 0.43359375, 0.4296875 ],
       [0.33984375, 0.37890625, 0.44921875, 0.40625   , 0.41015625,
        0.4296875 , 0.40234375, 0.453125  , 0.43359375, 0.4296875 ],
       [0.33203125, 0.4296875 , 0.41015625, 0.4609375 , 0.39453125,
        0.4453125 , 0.44921875, 0.41015625, 0.453125  , 0.47265625],
       [0.33203125, 0.4296875 , 0.41015625, 0.4609375 , 0.39453125,
        0.4453125 , 0.44921875, 0.41015625, 0.453125  , 0.47265625],
       [0.33203125, 0.30078125, 0.2578125 , 0.26171875, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ]],
      dtype=float32)>, <tf.Tensor: shape=(5,), dtype=bool, numpy=array([ True,  True,  True, False,  True])>)


## Model creation 
1. Model definition based on `tf.keras.Sequential` 

In [339]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
input_shape = 10
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Dense(128, activation="relu", name="layer1"),
        layers.Dense(64, activation="relu", name="layer2"),
        layers.Dense(1, activation="sigmoid", name="layer3"),
    ])

In [340]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer1 (Dense)              (None, 128)               1408      
                                                                 
 layer2 (Dense)              (None, 64)                8256      
                                                                 
 layer3 (Dense)              (None, 1)                 65        
                                                                 
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________


2. Model definition based on `keras.Model` class

In [343]:
input_layer = layers.Input(10, name="input_layer")
x_1 = layers.Dense(128, activation="relu", name="layer1")(input_layer)
x_2 = layers.Dense(64, activation="relu", name="layer2")(x_1)
x_3 = layers.Dense(1, activation="sigmoid", name="layer3")(x_2)
model2 = keras.Model( input_layer, x_3 )

In [344]:
model2.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 10)]              0         
                                                                 
 layer1 (Dense)              (None, 128)               1408      
                                                                 
 layer2 (Dense)              (None, 64)                8256      
                                                                 
 layer3 (Dense)              (None, 1)                 65        
                                                                 
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________


3. The third option is to create a class that inherits `keras.Model`

In [355]:
class SimpleANN(keras.Model):
    def __init__(self):
        super().__init__()
        self.dense_1 = layers.Dense(128, activation="relu", name="layer1")
        self.dense_2 = layers.Dense(64, activation="relu", name="layer2")
        self.out =  layers.Dense(1, activation="sigmoid", name="output")
    def call(self, inputs):
        x = self.dense_1(inputs)
        x = self.dense_2(x)
        return self.out(x)
    def build_graph(self, raw_shape):
        x = tf.keras.layers.Input(shape=raw_shape)
        return keras.Model(inputs=[x], outputs=self.call(x))

In [356]:
model3 = SimpleANN()

In [359]:
model3 = model3.build_graph(10)

In [360]:
model3.summary()

Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 10)]              0         
                                                                 
 layer1 (Dense)              (None, 128)               1408      
                                                                 
 layer2 (Dense)              (None, 64)                8256      
                                                                 
 output (Dense)              (None, 1)                 65        
                                                                 
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________


## Model Training
First of all, you need to compile your model and provide optimizer and loss function. 
In our example, we will use Adam optimizer and Binary Cross Entropy loss function as we are trying to solve binary classification task.

In [364]:
loss = tf.keras.losses.BinaryCrossentropy()

In [365]:
optimizer = tf.keras.optimizers.Adam()

In [378]:
model.compile(loss=loss, optimizer=optimizer)

In [379]:
model2.compile(loss=loss, optimizer=optimizer)

In [380]:
model3.compile(loss=loss, optimizer=optimizer)

### Start training
Using numpy arrays 

In [386]:
model.fit(affiliation, labels, batch_size=16, epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f86cee6d060>

In [387]:
model2.fit(affiliation, labels, batch_size=16, epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f86cc51bfd0>

In [388]:
model3.fit(affiliation, labels, batch_size=16, epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f86ce4e2650>

Train model using `tf.data.Dataset`

In [407]:
data = DataLoader().load_data_from_nump_arrays(affiliation, labels)

In [408]:
data = data.shuffle(buffer_size=len(labels))

In [409]:
data = data.batch(16)

In [410]:
model.fit(data, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f86cde51ed0>

## Custom loss function

In [425]:
def custom_huber_loss(threshold=1.0): 
    def huber_fn(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn

In [426]:
model.compile(loss=custom_huber_loss(2.0), optimizer="adam", metrics=["mae"])

In [427]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer1 (Dense)              (None, 128)               1408      
                                                                 
 layer2 (Dense)              (None, 64)                8256      
                                                                 
 layer3 (Dense)              (None, 1)                 65        
                                                                 
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________


In [428]:
model.fit(affiliation, labels, batch_size=16, epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f86bfdf31f0>

Another option is to create a class that inherits `tf.keras.losses.Loss` class

In [443]:
class CustomLoss(tf.keras.losses.Loss):
    def __init__(self, threshold=1.0):
        super().__init__()
        self.threshold = threshold
    def call(self, y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        error = y_true - y_pred 
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2 
        linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2 
        return tf.where(is_small_error, squared_loss, linear_loss)

In [444]:
loss_custom = CustomLoss()

In [445]:
model.compile(optimizer="adam", loss=loss_custom, metrics=['mae'])

In [446]:
model.fit(affiliation, labels, batch_size=16, epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f86bbd9ab00>

## TensorFlow Callbacks 

In [447]:
callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0.1, patience=2, verbose=0,
    mode='min', baseline=None, restore_best_weights=False)

In [448]:
model.fit(affiliation, labels, batch_size=16, epochs=5, validation_split=0.2, callbacks=callback)

Epoch 1/5
Epoch 2/5
Epoch 3/5


<keras.callbacks.History at 0x7f86b82e01f0>

## Custom training loop

In [473]:
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.BinaryCrossentropy()

BinaryAccuracy = tf.keras.metrics.BinaryAccuracy()

epochs = 5 

for epoch in range(epochs):
    for step, (x_batch_train, y_batch_train) in enumerate(data):
        with tf.GradientTape() as tape:
            logits = model(x_batch_train, training=True)
            loss_ = loss_fn(y_batch_train, logits)
        grads = tape.gradient(loss_, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        BinaryAccuracy.update_state(y_batch_train, logits)
    print("epoch : " + str(epoch+1) + " loss: " + str(loss_.numpy()) ) 
    Acc = BinaryAccuracy.result()
    print( "Binary Accuracy: ", float(Acc) )
    BinaryAccuracy.reset_states() #reset states after each epoch 

epoch : 1 loss: 0.6113289
Binary Accuracy:  0.7135416865348816
epoch : 2 loss: 0.49480128
Binary Accuracy:  0.734375
epoch : 3 loss: 0.46449468
Binary Accuracy:  0.7239583134651184
epoch : 4 loss: 0.37761664
Binary Accuracy:  0.7239583134651184
epoch : 5 loss: 0.6014573
Binary Accuracy:  0.7135416865348816
