<a href="https://colab.research.google.com/github/JacopoBartoli/vas_regression/blob/main/Transformers_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1) Install packages and organize imports.
In this section we install the needed packages and import them.
We set some variables for the used paths, and mount GDrive.

In [None]:
!pip install tensorflow-addons



In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
import tensorboard
import numpy as np
import pandas as pd
from tqdm import tqdm
import time
import datetime
import math

Save some usefull paths.

In [None]:
DATASET_DIR = '/content/gdrive/My Drive/IVA/data/'
LOGS_DIR = '/content/gdrive/My Drive/IVA/logs'
CHECKPOINT_DIR = '/content/gdrive/My Drive/IVA/checkpoint/train'
MODEL_DIR = '/content/gdrive/My Drive/IVA/model'

Mount the drive.

In [None]:
# Mount your drive to access the dataset.
# Remember to link the dataset as explained above.
from google.colab import drive
drive.mount('/content/gdrive')
!ls -l "/content/gdrive/My Drive/"
#!rm -rf './IVA/logs/gradient_tape/'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
ls: '/content/gdrive/My Drive/DeepFashion2 Dataset': No such file or directory
total 2794
drwx------ 2 root root   4096 Mar 18 14:03  3D_result
-rw------- 1 root root 555998 Dec  3  2020  7036362.pdf
-rw------- 1 root root   6420 Dec  2  2019 '7036362-sol (1).pdf'
-rw------- 1 root root   6669 Dec  3  2020  7036362-sol.pdf
-rw------- 1 root root 281590 Dec  8  2020  bartoli_jacopo_report2.pdf
drwx------ 2 root root   4096 Nov 28  2020 'Colab Notebooks'
drwx------ 2 root root   4096 Dec  1  2020  DDM
lrw------- 1 root root      0 Mar 24 15:34 'DeepFashion2 Dataset' -> '/content/gdrive/.shortcut-targets-by-id/125F48fsMBz2EF0Cpqk6aaHet5VH399Ok/DeepFashion2 Dataset'
drwx------ 6 root root   4096 Sep  3 11:19  IVA
-rw------- 1 root root 109584 Nov 10  2020  Jacopo_Bartoli_report.pdf
lrw------- 1 root root     25 Nov 30  2020 'My Drive' -> '/content/gdrive/My Dri

#2) Define our transformer.
In this section we implement our transformer model.

##2.1) Utility functions.
Define some utilities functions, for the positional encodings and the feed forward network.

In [None]:
# Define the positional encoding function.
def get_angles(pos, i, d_model):
  angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
  return pos * angle_rates


def positional_encoding(position, d_model):
  angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)

  # apply sin to even indices in the array; 2i
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

  # apply cos to odd indices in the array; 2i+1
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

  pos_encoding = angle_rads[np.newaxis, ...]

  return tf.cast(pos_encoding, dtype=tf.float32)

In [None]:
# Define the feed forward network
def point_wise_feed_forward_network(d_model, dff):
  return tf.keras.Sequential([
                              tf.keras.layers.Dense(dff, activation='relu'),
                              tf.keras.layers.Dense(d_model)
  ])

##2.2) Define the encoder layer.


In [None]:
class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(EncoderLayer, self).__init__()

    self.mha = tf.keras.layers.MultiHeadAttention(num_heads, output_shape=d_model, key_dim=24)
    self.ffn = point_wise_feed_forward_network(d_model, dff)

    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

    self.dropout1 = tf.keras.layers.Dropout(rate)
    self.dropout2 = tf.keras.layers.Dropout(rate)

  def call(self, x, training, mask):
    attn_output = self.mha(x,x,x,mask)  # (batch_size, input_seq_len, d_model)
    attn_output = self.dropout1(attn_output, training=training)
    out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)

    ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
    ffn_output = self.dropout2(ffn_output, training=training)
    out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)

    return out2

##2.3) Define the encoder.

In [None]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, num_layers, d_model, num_heads, dff, rate=0.1):
    super(Encoder, self).__init__()
    
    self.d_model = d_model
    self.num_layers = num_layers

    self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate ) for _ in range(num_layers)]

    self.dropout = tf.keras.layers.Dropout(rate)

  def call(self, x, training, mask):

    x = self.dropout(x, training=training)

    for i in range(self.num_layers):
      x = self.enc_layers[i](x, training, mask)

    return x

## 2.4) Define the transformer with the regression layer.

In [None]:
class EncoderRegressor(tf.keras.Model):
  def __init__(self, feat_dim, max_len, d_model, n_heads, num_layers, dim_feedforward, num_classes=1, dropout=0.1, pos_encoding='fixed', activation='gelu', norm='BatchNorm'):

    super().__init__()

    self.max_len = max_len
    self.d_model = d_model
    self.n_heads = n_heads
    
    self.flatten_inp = tf.keras.layers.Flatten()

    self.project_inp = tf.keras.layers.Dense(max_len*d_model)

    self.reshape = tf.keras.layers.Reshape((max_len, d_model))

    self.pos_encoding = positional_encoding(2048, self.d_model)


    self.encoder_layer = Encoder(num_layers = num_layers, d_model = d_model, num_heads = n_heads, dff= dim_feedforward, rate=0.01)

    self.act = tf.keras.activations.get(activation)
    self.dropout = tf.keras.layers.Dropout(dropout)

    self.flatten = tf.keras.layers.Flatten()
    
    self.reg = tf.keras.layers.Dense(num_classes)

    self.feat_dim = feat_dim
    self.num_classes = num_classes

  def call(self, inputs, training):    
      enc_padding_mask = None #create_padding_mask(inputs)
      seq_len = tf.shape(inputs)[1]

      # Flatten the input tensor and map in a different vector space(d_model)
      x = self.flatten_inp(inputs)
      x = self.project_inp(x)

      # Reshape the tensor to adapt the shape [batch_size, sequence_lenght, d_model]
      x = self.reshape(x)

      # Positional encoding.
      x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
      x += self.pos_encoding[:, :seq_len, :]

      
      # Encoder Layer
      x = self.encoder_layer(x, training, enc_padding_mask)
      x = self.dropout(x, training=training)

      # Regression Layers
      x = self.flatten(x)
      x = self.reg(x)

      return x


  

#3) Manage the data.
In this section we manipulate and extract the data.

##3.1) Load the train set.
Define the name of the dataset used for 
training.


In [None]:
# Name of the dataset used.
TRAIN_SET = 'train-vs.csv'

Load the train set from a .csv file.

In [None]:
df = pd.read_csv(DATASET_DIR + TRAIN_SET)
print(df.head())

   Sequenza  Frame      Vel1      Vel2  ...     Vel10     Vel11     Vel12  Label
0         0      0  1.107498  0.802499  ...  1.191629  1.168866  1.850598      0
1         0      1  0.091547  0.071286  ...  0.188025  0.507695  0.759535      0
2         0      2  0.433357 -0.129341  ...  0.093921  0.048248  0.238178      0
3         0      3  0.091547 -0.233620  ... -0.146718 -0.063956  0.029616      0
4         0      4 -0.003324 -0.333743  ... -0.393765 -0.303028 -0.271540      0

[5 rows x 15 columns]


## 3.2) Extract the labels from the dataset.
Extract the train set column that contains the labels and group them by sequence id. In this way the output is a list of a single label for each sequence.

In [None]:
# Extract the column that contains the labels.
lbl = df['Label']
seq_ids = df['Sequenza']

temp = pd.concat([seq_ids, lbl], axis=1)
temp = temp.set_index('Sequenza')
temp = temp.groupby(level='Sequenza').mean()

lbl = temp['Label'].values

We need to manipulate the dataframe in order to get the desired input format.
In particular we want an array of the shape [num_sequence, sequence_elements, features_dim].
To do this we will first remove the label column from the dataframe. 
The input sequeces in the dataframe need to be numbered from 0 to $num\_seq - 1$.

In [None]:
# Drop the label column.
data = df.drop(['Label'], axis = 1)
num_seqs = data['Sequenza'].max() + 1

# Create the new dataset.
temp = []
for id in tqdm(range(num_seqs)):
  # Extract sequences one by one.
  seq = data.loc[data['Sequenza'] == id]

  # Remove the unused columns.
  seq = seq.drop(['Sequenza','Frame'], axis=1)
  num_col = len(seq.columns)

  # Iterate over each row of the selected sequence  
  temp_row = []
  for index, row in seq.iterrows():
    temp_row = np.append(temp_row, row)
  temp_row = np.reshape(temp_row, (-1, num_col))

  temp.append(temp_row[:])

data = temp

100%|██████████| 180/180 [00:03<00:00, 45.96it/s]


Then we need to pad the each sequence in order to make them all of the same lenght.

In [None]:
# Default values:
# Default value added to fill the sequence si 0
# padding = 'pre' ---> The sequence will be filled inserting the zeros at the start.
data = tf.keras.preprocessing.sequence.pad_sequences(data, dtype='float64')

In [None]:
print(len(data[100]))
print(len(data[160]))
print(data[100][675])
print(data[160][641])
print(data[160][11])

681
681
[5.69299421 3.39554945 2.37205529 4.94836284 0.95650173 1.60686903
 1.93938091 2.44618251 3.04341193 2.94810158 5.81767736 5.8058954 ]
[0.40584004 0.62164306 0.26023285 1.78844872 0.03775138 0.64564219
 0.76591467 1.51821512 0.75974073 0.99427973 0.43091039 0.29837468]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


## 3.3) Create and manage the train set.


In [None]:
ds = tf.data.Dataset.from_tensor_slices((data, lbl))
val_examples = ds.take(18)
train_examples = ds.skip(18)
print(val_examples)
print(train_examples)

<TakeDataset shapes: ((681, 12), ()), types: (tf.float64, tf.int64)>
<SkipDataset shapes: ((681, 12), ()), types: (tf.float64, tf.int64)>


In [None]:
BATCH_SIZE = 8
BUFFER_SIZE = 180
random_seed = 1337

In [None]:
def make_batches(ds):
  return (
      ds
      .cache()
      .shuffle(BUFFER_SIZE,seed=random_seed)
      .batch(BATCH_SIZE)
      .prefetch(tf.data.AUTOTUNE))

Make batches for validation and training sets.

In [None]:
train_batches = make_batches(train_examples)
val_batches = make_batches(val_examples)

#4) Training.

## 4.1)Set the hyperparameters.
Set the transformer hyperparameter, define the learning rate, optimizer and loss type.

In [None]:
# Model hyperparameters
d_model = 32
dim_feedforward = 256
n_heads = 6
num_layers = 3
feat_dim = 12 # Number of feature inside each item of the sequence.
max_len=  681 # Lenght of each sequence.

# Parameter needed for separate classification from regression.
# For now just regression is implemented.
num_classes = 1
is_classification = False


# Network hyperparameter
learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam()
# tfa.optimizers.RectifiedAdam() seems to not work properly.

# This loss and accuracy objects are meant for regression.
# For classifications other metrics will be needed.
loss_object = tf.keras.losses.MeanSquaredError()
accuracy_object = tf.keras.metrics.MeanAbsoluteError()

## 4.2) Custom implementation of the loss and accuracy functions.

Add a way to customize the loss and accuracy functions.

In [None]:
def loss_function(real,pred):

  loss_ = loss_object(real, pred)

  return loss_

def accuracy_function(real, pred):

  accuracies = accuracy_object(real, pred)
  
  return accuracies

Create the metric objects.

In [None]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

## 4.4) Manage checkpoint and Tensorboard.
Create the model and load last checkpoint if it exist.

In [None]:
# Create the transformer.
transformer = EncoderRegressor(d_model=d_model, dim_feedforward=dim_feedforward, n_heads=n_heads, num_layers=num_layers, feat_dim=feat_dim, max_len=max_len, num_classes = num_classes)

# Checkpoint management.
ckpt = tf.train.Checkpoint(trasformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, CHECKPOINT_DIR, max_to_keep=5)
if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print("Restored latest checkpoint")

Restored latest checkpoint


Set paths for tensorboard visualization.

In [None]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = LOGS_DIR + '/gradient_tape/' + current_time + '/train'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)

## 4.5) Train the model.
Set the number of epoch and define the train step.



In [None]:
# Number of epochs
EPOCHS = 30

In [None]:
def train_step(inp,tar):
  tar_real = tar

  with tf.GradientTape() as tape:
    predictions = transformer(inp, training = True) 
    loss = loss_function(tar_real, predictions)
    accuracy = accuracy_function(tar_real, predictions)
    
    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))


    train_loss(loss)
    train_accuracy(accuracy)
  
  return predictions

Start the training.

In [None]:
for epoch in range(EPOCHS):
  start = time.time()
  # Needed for histogram visualization.
  predictions_histogram = []
  labels_histogram = []
  
  train_loss.reset_states()
  train_accuracy.reset_states()

  for (batch, (inp, tar)) in enumerate(train_batches):
    predictions = train_step(inp, tar)
    # Save the histogram of predictions.
    predictions_histogram = np.hstack((predictions_histogram, tf.reshape(predictions, len(predictions))))    
    labels_histogram = np.hstack((labels_histogram, tar))

  with train_summary_writer.as_default():
    tf.summary.scalar('loss', train_loss.result(), step=epoch)
    tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)
    tf.summary.histogram('predictions distribution', predictions_histogram, step = epoch)
    tf.summary.histogram('label distribution', labels_histogram, step = epoch)


  if (epoch + 1) % 5 == 0:
    ckpt_save_path = ckpt_manager.save()
    print(f'Saving checkpoint for epoch {epoch+1} at {ckpt_save_path}')

  print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')

  print(f'Time taken for 1 epoch: {time.time() - start:.2f} secs\n')

print(ckpt_manager.save())

Epoch 1 Loss 18.8808 Accuracy 3.4882
Time taken for 1 epoch: 89.18 secs

Epoch 2 Loss 7.9235 Accuracy 2.5802
Time taken for 1 epoch: 81.93 secs

Epoch 3 Loss 6.6498 Accuracy 2.4142
Time taken for 1 epoch: 76.12 secs

Epoch 4 Loss 4.7124 Accuracy 2.2737
Time taken for 1 epoch: 69.44 secs

Saving checkpoint for epoch 5 at /content/gdrive/My Drive/IVA/checkpoint/train/ckpt-8
Epoch 5 Loss 4.2108 Accuracy 2.1474
Time taken for 1 epoch: 99.28 secs

Epoch 6 Loss 4.0342 Accuracy 2.0632
Time taken for 1 epoch: 141.92 secs

Epoch 7 Loss 2.4834 Accuracy 1.9575
Time taken for 1 epoch: 80.35 secs

Epoch 8 Loss 3.0392 Accuracy 1.8784
Time taken for 1 epoch: 72.76 secs

Epoch 9 Loss 2.8054 Accuracy 1.8167
Time taken for 1 epoch: 141.93 secs

Saving checkpoint for epoch 10 at /content/gdrive/My Drive/IVA/checkpoint/train/ckpt-9
Epoch 10 Loss 3.8798 Accuracy 1.7882
Time taken for 1 epoch: 94.91 secs

Epoch 11 Loss 2.2984 Accuracy 1.7553
Time taken for 1 epoch: 81.93 secs

Epoch 12 Loss 1.6429 Accuracy 

## 4.6) Save the model.
Save the summary of the model on tensorboard.

In [None]:
def get_summary_str(model):
    lines = []
    model.summary(print_fn=lines.append)
    # Add initial spaces to avoid markdown formatting in TensorBoard
    return '    ' + '\n    '.join(lines)

# Add the summary as text in Tensorboard
with train_summary_writer.as_default():
  tf.summary.text('Model configuration', get_summary_str(transformer), step=0)

In [None]:
class ExportTransformer(tf.Module):
  def __init__(self, transformer):
    self.transformer = transformer
    
  @tf.function()
  def __call__(self, inputs):
    result = self.transformer(inputs, training = False)

    return result

Save the transformer model in .h5 format.

In [None]:
# Calling `save('my_model.h5')` creates a h5 file `my_model.h5`.
# Currently not working
exporter = ExportTransformer(transformer)
tf.saved_model.save(exporter, export_dir=MODEL_DIR + '/' + current_time + '/transformers')



INFO:tensorflow:Assets written to: /content/gdrive/My Drive/IVA/model/20210916-102654/transformers/assets


INFO:tensorflow:Assets written to: /content/gdrive/My Drive/IVA/model/20210916-102654/transformers/assets


# 5) Manage the test data.
In this section we manipulate and extract the data.

## 5.1) Load the test set.
Define the name of the dataset used for testing.



In [None]:
# Name of the dataset used.
TEST_SET = 'test-vs.csv'

Load the test set from a .csv file.

In [None]:
df = pd.read_csv(DATASET_DIR + TEST_SET)
print(df.head())

   Sequenza  Frame      Vel1      Vel2  ...     Vel10     Vel11     Vel12  Label
0       180      0 -0.385031 -0.398413  ... -0.037925 -0.357710 -0.113936      0
1       180      1 -0.341554 -0.266994  ... -0.428443 -0.271442 -0.195584      0
2       180      2 -0.003324 -0.100123  ...  1.188008  0.627858 -0.336879      0
3       180      3 -0.409479 -0.359790  ... -0.052292 -0.002093 -0.355900      0
4       180      4 -0.061690 -0.019474  ... -0.309205 -0.175785 -0.271540      0

[5 rows x 15 columns]


## 5.2) Extract the labels from the dataset.
Extract the test set column that contains the labels and group them by sequence id. In this way the output is a list of a single label for each sequence.

In [None]:
# Extract the column that contains the test labels.
lbl = df['Label']
seq_ids = df['Sequenza']

temp = pd.concat([seq_ids, lbl], axis=1)
temp = temp.set_index('Sequenza')
temp = temp.groupby(level='Sequenza').mean()

lbl_test = temp['Label'].values

Remove the label column from the dataframe, and transform the data in the correct input format. The sequences need to be numbered from 0 to $num\_seq - 1$.

In [None]:
# Drop the label column.
data = df.drop(['Label'], axis = 1)
min_seq = data['Sequenza'].min()
num_seqs = data['Sequenza'].max() - data['Sequenza'].min() + 1

# Create the new dataset.
temp = []
for id in tqdm(range(min_seq, min_seq + num_seqs)):
  # Extract sequences one by one.
  seq = data.loc[data['Sequenza'] == id]

  # Remove the unused columns.
  seq = seq.drop(['Sequenza','Frame'], axis=1)
  num_col = len(seq.columns)

  # Iterate over each row of the selected sequence  
  temp_row = []
  for index, row in seq.iterrows():
    temp_row = np.append(temp_row, row)
  temp_row = np.reshape(temp_row, (-1, num_col))

  temp.append(temp_row[:])

data = temp

100%|██████████| 20/20 [00:00<00:00, 49.67it/s]


In [None]:
data = tf.keras.preprocessing.sequence.pad_sequences(data, maxlen = 681, dtype='float64')

In [None]:
print(len(data[19]))
print(len(data[1]))
print(data[19][680])
#print(data[19][641])

681
681
[1.26310275 0.94694645 0.72443641 0.16871895 0.40214158 0.80335244
 0.08617381 0.29341975 1.53734696 1.27304387 1.21974766 1.38796965]


## 5.3) Create and manage the test set.
Transform the dataset ina tensorflow.data.Dataset format.

In [None]:
ds = tf.data.Dataset.from_tensor_slices((data, lbl_test))

Define batch size and other variables.

In [None]:
BATCH_SIZE = 1
BUFFER_SIZE = 180
random_seed = 1337

In [None]:
def make_batches(ds):
  return (
      ds
      .cache()
      .shuffle(BUFFER_SIZE,seed=random_seed)
      .batch(BATCH_SIZE)
      .prefetch(tf.data.AUTOTUNE))

In [None]:
test_batches = make_batches(ds)

#5)

In [None]:
error_object = tf.keras.metrics.MeanAbsoluteError()
test_accuracy = tf.keras.metrics.Mean(name='test_accuracy')
test_log_dir = LOGS_DIR + '/gradient_tape/' + current_time + '/test'
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

In [None]:
def accuracy_function(real, pred):

  accuracies = error_object(real, pred)
  
  return accuracies

In [None]:
start = time.time()
  
test_accuracy.reset_states()

# Needed for histogram visualization.
predictions_histogram = []
labels_histogram = []


for (batch, (inp, tar)) in enumerate(test_batches):
    predictions = transformer(inp, training = False)
    
    accuracy = accuracy_function(tar, predictions)
    # Save the histogram of predictions.
    predictions_histogram = np.hstack((predictions_histogram, tf.reshape(predictions, len(predictions))))    
    labels_histogram = np.hstack((labels_histogram, tar))
    with test_summary_writer.as_default():
      tf.summary.scalar('error', accuracy, step = batch)
with test_summary_writer.as_default():
   tf.summary.histogram('predictions distribution', predictions_histogram,step = 0)
   tf.summary.histogram('label distribution', labels_histogram, step = 0)