<a href="https://colab.research.google.com/github/JacopoBartoli/vas_regression/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1) Install packages and organize imports.
In this section we install the needed packages and import them.
We set some variables for the used paths, and mount GDrive.

In [1]:
!pip install tensorflow-addons

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.14.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 4.9 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.14.0


In [2]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
import tensorboard
import numpy as np
import pandas as pd
from tqdm import tqdm
import time
import datetime
import math
import sklearn.preprocessing

Save some useful paths.

In [3]:
DATASET_DIR = '/content/gdrive/My Drive/IVA/data/'
LOGS_DIR = '/content/gdrive/My Drive/IVA/logs'
CHECKPOINT_DIR = '/content/gdrive/My Drive/IVA/checkpoint/train'
MODEL_DIR = '/content/gdrive/My Drive/IVA/model'

Mount the drive.

In [None]:
# Mount your drive to access the dataset.
from google.colab import drive
drive.mount('/content/gdrive')
#!ls -l "/content/gdrive/My Drive/"
#!rm -rf './IVA/logs/gradient_tape/'

#2) Define our transformer.
In this section we implement our transformer model.

##2.1) Utility functions.
Define some utilities functions, for the positional encodings and the feed forward network.

In [5]:
# Define the positional encoding function.
def get_angles(pos, i, d_model):
  angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
  return pos * angle_rates


def positional_encoding(position, d_model):
  angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)

  # apply sin to even indices in the array; 2i
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

  # apply cos to odd indices in the array; 2i+1
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

  pos_encoding = angle_rads[np.newaxis, ...]

  return tf.cast(pos_encoding, dtype=tf.float32)

In [6]:
# Define the feed forward network
def point_wise_feed_forward_network(d_model, dff):
  return tf.keras.Sequential([
                              tf.keras.layers.Dense(dff, activation='relu'),
                              tf.keras.layers.Dense(d_model)
  ])

##2.2) Define the encoder layer.


In [7]:
class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(EncoderLayer, self).__init__()

    self.mha = tf.keras.layers.MultiHeadAttention(num_heads, output_shape=d_model, key_dim=24)
    self.ffn = point_wise_feed_forward_network(d_model, dff)

    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

    self.dropout1 = tf.keras.layers.Dropout(rate)
    self.dropout2 = tf.keras.layers.Dropout(rate)

  def call(self, x, training, mask):
    attn_output = self.mha(x,x,x,mask)  # (batch_size, input_seq_len, d_model)
    attn_output = self.dropout1(attn_output, training=training)
    out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)

    ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
    ffn_output = self.dropout2(ffn_output, training=training)
    out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)

    return out2

##2.3) Define the encoder.

In [8]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, num_layers, d_model, num_heads, dff, rate=0.1):
    super(Encoder, self).__init__()
    
    self.d_model = d_model
    self.num_layers = num_layers

    self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate ) for _ in range(num_layers)]

    self.dropout = tf.keras.layers.Dropout(rate)

  def call(self, x, training, mask):

    x = self.dropout(x, training=training)

    for i in range(self.num_layers):
      x = self.enc_layers[i](x, training, mask)

    return x

## 2.4) Define the transformer with the LSTM  layer.

In this section we define our model using the layers defined above. Only the encoder part of a transformed model is used. The output of this encoder is feeded to a LSTM that perform regression.

In [9]:
class EncoderRegressor(tf.keras.Model):
  def __init__(self, feat_dim, max_len, d_model, n_heads, num_layers, dim_feedforward, num_classes=1, dropout=0.1, pos_encoding='fixed', activation='sigmoid', norm='BatchNorm'):

    super().__init__()

    self.max_len = max_len
    self.d_model = d_model
    self.n_heads = n_heads
    
    self.flatten_inp = tf.keras.layers.Flatten()

    self.project_inp = tf.keras.layers.Dense(max_len*d_model)

    self.reshape = tf.keras.layers.Reshape((max_len, d_model))

    self.pos_encoding = positional_encoding(2048, self.d_model)


    self.encoder_layer = Encoder(num_layers = num_layers, d_model = d_model, num_heads = n_heads, dff= dim_feedforward, rate=0.01)

    self.act = tf.keras.activations.get(activation)
    self.dropout = tf.keras.layers.Dropout(dropout)

    self.lstm = tf.keras.layers.LSTM(num_classes, activation=self.act)

    self.feat_dim = feat_dim
    self.num_classes = num_classes

  def call(self, inputs, training):    
      enc_padding_mask = None #create_padding_mask(inputs)
      seq_len = tf.shape(inputs)[1]

      # Flatten the input tensor and map in a different vector space(d_model)
      x = self.flatten_inp(inputs)
      x = self.project_inp(x)

      # Reshape the tensor to adapt the shape [batch_size, sequence_lenght, d_model]
      x = self.reshape(x)

      # Positional encoding.
      x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
      x += self.pos_encoding[:, :seq_len, :]

      
      # Encoder Layer
      x = self.encoder_layer(x, training, enc_padding_mask)
      x = self.dropout(x, training=training)

      # LSTM
      x = self.lstm(x)

      return x


  

#3) Manage the data.
In this section we manipulate and extract the data.

##3.1) Load the train set.

Define the name of the dataset used for 
training.

The data in the .csv can have a variable number of features. But three column are always needed. They are 'Sequenza', 'Frame' and 'Label'.
The first represent the id of a sequence, the second the id of a frame. The third represent the label of each frame.


Each row of the file need to represent a frame, and each frame of the same sequence need to have the same label.



In [10]:
# Name of the dataset used.
TRAIN_SET = 'train-velocity-66-sampled.csv'

Load the train set from a .csv file.

In [11]:
df = pd.read_csv(DATASET_DIR + TRAIN_SET)
print(df.head())

   Sequenza  Frame      Vel0      Vel1  ...     Vel63     Vel64     Vel65  Label
0       0.0    0.0  0.005976  0.015146  ...  0.027066  0.050636  0.048805    0.0
1       0.0    1.0  0.010653  0.011065  ...  0.024913  0.026466  0.027035    0.0
2       0.0    2.0  0.021485  0.022437  ...  0.016834  0.021260  0.022263    0.0
3       0.0    3.0  0.015915  0.016874  ...  0.012734  0.013349  0.013058    0.0
4       0.0    4.0  0.019476  0.019125  ...  0.010596  0.009103  0.009860    0.0

[5 rows x 69 columns]


## 3.2) Divide the data in train and validation set.

The data is splitted in train and validation set. After that we divide the label from the features.


In [12]:
# Dimension of the validation set.
VAL_PERCENTAGE = 10
valid_dim = math.floor(df['Sequenza'].max()*VAL_PERCENTAGE / 100)
train_dim = df['Sequenza'].max() - valid_dim
max_seq = df['Sequenza'].max()


# Divide by train and validation set.
df_valid = df.loc[df['Sequenza'] >= (max_seq - valid_dim)]
df_train = df.loc[df['Sequenza'] < train_dim]
df_valid = df_valid.drop(['Frame'], axis=1)
df_train = df_train.drop(['Frame'], axis=1)
# Extract the labels.
lbl_valid = df_valid['Label']
lbl_train = df_train['Label']
# Remove the labels from the data.
df_valid = df_valid.drop(['Label'], axis = 1)
df_train = df_train.drop(['Label'], axis = 1)

In [13]:
print(df_train.head())

   Sequenza      Vel0      Vel1  ...     Vel63     Vel64     Vel65
0       0.0  0.005976  0.015146  ...  0.027066  0.050636  0.048805
1       0.0  0.010653  0.011065  ...  0.024913  0.026466  0.027035
2       0.0  0.021485  0.022437  ...  0.016834  0.021260  0.022263
3       0.0  0.015915  0.016874  ...  0.012734  0.013349  0.013058
4       0.0  0.019476  0.019125  ...  0.010596  0.009103  0.009860

[5 rows x 67 columns]


##3.3) Prerocessing of the sequences.

In the dataset each row represent a frame of the sequence. Each frame in a sequence has the same label. We want to make some preprocessing for having a dataset that has a single label for each sequence (not one for each frame). We want that each item of the dataset represent a whole sequence and not a frame.


In [14]:
# Need to pass different in a separate ways lbl and data.
def preprocessing_sequences(data, lbl):
  # Preprocess the labels.
  # The label and the ids of the sequence are concatenated together.
  seq_ids = data['Sequenza']

  tmp = pd.concat([seq_ids, lbl], axis=1)
  tmp = tmp.set_index('Sequenza')
  # Then they are gourped by sequence id so we can have a single label for each
  # sequence.
  tmp = tmp.groupby(level='Sequenza').mean()

  labels = tmp['Label'].values

  min_seq = data['Sequenza'].min()
  num_seqs = data['Sequenza'].max() - data['Sequenza'].min() + 1
  min_seq = int(min_seq)
  num_seqs = int(num_seqs)

  # Create the new dataset.
  temp = []
  for id in tqdm(range(min_seq, min_seq + num_seqs)):
    # Extract sequences one by one.
    seq = data.loc[data['Sequenza'] == id]

    # Remove the unused columns.
    seq = seq.drop(['Sequenza'], axis=1)
    num_col = len(seq.columns)

    # Iterate over each row of the selected sequence  
    temp_row = []
    for index, row in seq.iterrows():
      temp_row = np.append(temp_row, row)
    temp_row = np.reshape(temp_row, (-1, num_col))

    temp.append(temp_row[:])

  return temp, labels


In [16]:
df_train, lbl_train = preprocessing_sequences(df_train, lbl_train)
df_valid, lbl_valid = preprocessing_sequences(df_valid, lbl_valid)

100%|██████████| 190/190 [00:03<00:00, 49.95it/s]
100%|██████████| 22/22 [00:00<00:00, 47.50it/s]


## 3.3) Create and manage the train and validation set.


In [17]:
ds_train = tf.data.Dataset.from_tensor_slices((df_train, lbl_train))
ds_valid = tf.data.Dataset.from_tensor_slices((df_valid, lbl_valid))

In [18]:
BATCH_SIZE = 8
BUFFER_SIZE = 5000
random_seed = 1337

Function to apply some preprocessing when making batches.

In [19]:
def make_batches(ds):
  return (
      ds
      .cache()
      .shuffle(BUFFER_SIZE,seed=random_seed)
      .batch(BATCH_SIZE)
      .prefetch(tf.data.AUTOTUNE))

Now we divide in batches the validation and training sets.

In [20]:
train_batches = make_batches(ds_train)
val_batches = make_batches(ds_valid)

#4) Training Phase.

In this section we organize all the operation needed to perform the train of the model and evaluate its performance.

## 4.1)Set the hyperparameters.
Set the transformer hyperparameter, define the learning rate, optimizer and loss type.

In [21]:
# Model hyperparameters
d_model = 128 # Dimension of the hidden representation.
dim_feedforward = 256
n_heads = 6
num_layers = 3
feat_dim = len(df.columns) - 3 # Number of feature inside each item of the sequence.

# Changed.
max_len=  len(df_train[0]) # Lenght of each sequence.

# Parameter needed for separate classification from regression.
# For now just regression is implemented.
num_classes = 1
is_classification = False


# Network hyperparameter
learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam()
# tfa.optimizers.RectifiedAdam() seems to not work properly.

# This loss and accuracy objects are meant for regression.
# For classifications other metrics will be needed.
loss_object = tf.keras.losses.MeanSquaredError()
accuracy_object = tf.keras.metrics.MeanAbsoluteError()

## 4.2) Custom implementation of the loss and accuracy functions.

Add a way to customize the loss and accuracy functions.

In [22]:
def loss_function(real,pred):

  loss_ = loss_object(real, pred)

  return loss_

def accuracy_function(real, pred):

  accuracies = accuracy_object(real, pred)
  
  return accuracies

Create the metric objects.

In [23]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')
val_accuracy = tf.keras.metrics.Mean(name='validion_accuracy')
val_loss = tf.keras.metrics.Mean(name='validation_loss')

## 4.4) Manage checkpoint and Tensorboard.
Create the model and load last checkpoint if it exist.

In [24]:
# Create the transformer.
transformer = EncoderRegressor(d_model=d_model, dim_feedforward=dim_feedforward, n_heads=n_heads, num_layers=num_layers, feat_dim=feat_dim, max_len=max_len, num_classes = num_classes)

# Checkpoint management.
use_checkpoint = False
if use_checkpoint:
  ckpt = tf.train.Checkpoint(trasformer=transformer,
                           optimizer=optimizer)

  ckpt_manager = tf.train.CheckpointManager(ckpt, CHECKPOINT_DIR, max_to_keep=5)
  if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Restored latest checkpoint")

Set paths for tensorboard visualization.

In [25]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = LOGS_DIR + '/gradient_tape/' + current_time + '/train'
valid_log_dir = LOGS_DIR + '/gradient_tape/' + current_time + '/valid'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
valid_summary_writer = tf.summary.create_file_writer(valid_log_dir)

## 4.5) Train the model.
Set the number of epoch and define the train step.



In [26]:
# Number of epochs
EPOCHS = 30

In [27]:
def valid_step(inp,tar):
  tar_real = tar

  
  predictions = transformer(inp, training = False) 
  loss = loss_function(tar_real, predictions)
  accuracy = accuracy_function(tar_real, predictions)


  val_loss(loss)
  val_accuracy(accuracy)
  
  return predictions


In [28]:
def train_step(inp,tar):
  tar_real = tar

  with tf.GradientTape() as tape:
    predictions = transformer(inp, training = True) 
    loss = loss_function(tar_real, predictions)
    accuracy = accuracy_function(tar_real, predictions)
    
    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))


    train_loss(loss)
    train_accuracy(accuracy)
  
  return predictions

Start the training.

In [29]:
for epoch in range(EPOCHS):
  start = time.time()
  # Needed for histogram visualization.
  predictions_histogram = []
  labels_histogram = []
  
  train_loss.reset_states()
  train_accuracy.reset_states()

  for (batch, (inp, tar)) in enumerate(train_batches):
    predictions = train_step(inp, tar)
    # Save the histogram of predictions.
    predictions_histogram = np.hstack((predictions_histogram, tf.reshape(predictions, len(predictions))))    
    labels_histogram = np.hstack((labels_histogram, tar))

  with train_summary_writer.as_default():
    tf.summary.scalar('loss', train_loss.result(), step=epoch)
    tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)
    tf.summary.histogram('predictions distribution', predictions_histogram, step = epoch)
    tf.summary.histogram('ground truth distribution', labels_histogram, step = epoch)
  
  predictions_histogram = []
  labels_histogram = []
  val_loss.reset_states()
  val_accuracy.reset_states()

  for (batch, (inp,tar)) in enumerate(val_batches):
    predictions = valid_step(inp,tar)
    # Save the histogram of predictions.
    predictions_histogram = np.hstack((predictions_histogram, tf.reshape(predictions, len(predictions))))    
    labels_histogram = np.hstack((labels_histogram, tar))

  with valid_summary_writer.as_default():
    tf.summary.scalar('loss', val_loss.result(), step=epoch)
    tf.summary.scalar('accuracy', val_accuracy.result(), step=epoch)
    tf.summary.histogram('predictions distribution', predictions_histogram, step = epoch)
    tf.summary.histogram('ground truth distribution', labels_histogram, step = epoch)
  



  if (epoch + 1) % 5 == 0 and use_checkpoint:
    ckpt_save_path = ckpt_manager.save()
    print(f'Saving checkpoint for epoch {epoch+1} at {ckpt_save_path}')

  print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')

  print(f'Time taken for 1 epoch: {time.time() - start:.2f} secs\n')

if use_checkpoint:
  print(ckpt_manager.save())

Epoch 1 Loss 0.1303 Accuracy 0.3056
Time taken for 1 epoch: 84.12 secs

Epoch 2 Loss 0.1015 Accuracy 0.2961
Time taken for 1 epoch: 73.17 secs

Epoch 3 Loss 0.0981 Accuracy 0.2802
Time taken for 1 epoch: 73.01 secs

Epoch 4 Loss 0.1033 Accuracy 0.2791
Time taken for 1 epoch: 84.10 secs

Epoch 5 Loss 0.0977 Accuracy 0.2791
Time taken for 1 epoch: 84.48 secs

Epoch 6 Loss 0.0986 Accuracy 0.2750
Time taken for 1 epoch: 84.12 secs

Epoch 7 Loss 0.1029 Accuracy 0.2761
Time taken for 1 epoch: 84.48 secs

Epoch 8 Loss 0.1036 Accuracy 0.2771
Time taken for 1 epoch: 84.48 secs

Epoch 9 Loss 0.1026 Accuracy 0.2776
Time taken for 1 epoch: 73.27 secs

Epoch 10 Loss 0.1026 Accuracy 0.2785
Time taken for 1 epoch: 84.09 secs

Epoch 11 Loss 0.0946 Accuracy 0.2778
Time taken for 1 epoch: 84.11 secs

Epoch 12 Loss 0.0885 Accuracy 0.2764
Time taken for 1 epoch: 73.10 secs

Epoch 13 Loss 0.0828 Accuracy 0.2733
Time taken for 1 epoch: 72.73 secs

Epoch 14 Loss 0.0915 Accuracy 0.2717
Time taken for 1 epoch:

## 4.6) Save the model.
Save the summary of the model on tensorboard.

In [30]:
def get_summary_str(model):
    lines = []
    model.summary(print_fn=lines.append)
    # Add initial spaces to avoid markdown formatting in TensorBoard
    return '    ' + '\n    '.join(lines)

# Add the summary as text in Tensorboard
with train_summary_writer.as_default():
  tf.summary.text('Model configuration', get_summary_str(transformer), step=0)

In [31]:
class ExportTransformer(tf.Module):
  def __init__(self, transformer):
    self.transformer = transformer
    
  @tf.function()
  def __call__(self, inputs):
    result = self.transformer(inputs, training = False)

    return result

Save the transformer model in .h5 format.

In [32]:
# Calling `save('my_model.h5')` creates a h5 file `my_model.h5`.
# Currently not working
exporter = ExportTransformer(transformer)
tf.saved_model.save(exporter, export_dir=MODEL_DIR + '/' + current_time + '/transformers')



INFO:tensorflow:Assets written to: /content/gdrive/My Drive/IVA/model/20211011-214154/transformers/assets


INFO:tensorflow:Assets written to: /content/gdrive/My Drive/IVA/model/20211011-214154/transformers/assets
