# Initial Config (GPU and Location)

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
 
# The GPU ID can be 0 to number_of_gpus - 1. If you want CPU put -1 
os.environ["CUDA_VISIBLE_DEVICES"]="1";

#If you are working in Colab put this variable to False
SERVER = True

# Imports

## Libraries

### From python

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
from IPython.display import display, clear_output
from ipywidgets import interact, IntSlider
import numpy as np
import random
import math
%matplotlib inline
import matplotlib.pyplot as plt
from collections import namedtuple
if not SERVER:
    !pip -q install -U nltk==3.4.5
    %tensorflow_version 2.x
import tensorflow as tf
import nltk

In [None]:
# Set the GPU Memory Growth with the use to True and log device to False
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
tf.debugging.set_log_device_placement(False)

### Ours and 3rd Libraries

In [None]:
if SERVER:
    !git pull
else:
    # Py archives
    !git clone https://github.com/JefeLitman/SignLanguageTranslation_SLT.git
    # Arrange files
    !mv /content/SignLanguageTranslation_SLT/utils /content/
    !mv /content/SignLanguageTranslation_SLT/models /content/
    !mv /content/SignLanguageTranslation_SLT/metrics /content/
    # Delete the remaining files
    !rm -rf /content/SignLanguageTranslation_SLT
#DatasetsLoaderUtils
!wget -q https://raw.githubusercontent.com/JefeLitman/VideoDataGenerator/master/DatasetsLoaderUtils.py -O DatasetsLoaderUtils.py
!mv DatasetsLoaderUtils.py utils/DatasetsLoaderUtils.py

In [None]:
from utils.preprocess_data import preprocessing_paths, preprocessing_sentences, table_paths_dataset
from utils.DatasetsLoaderUtils import flow_from_tablePaths
from utils.results import save_predictions, calculate_metrics_results
from metrics.losses import SparseCategoricalCrossentropy_mask
from metrics.accuracy import real_acc
from models import compute_features, encoder, decoder, reduce_features, self_attentions, st_attentions, output

## Data

In [None]:
if not SERVER:
    !rm -rf /content/sample_data
    # Download the data
    !wget --quiet --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1Ph_Ys3O_vI93WeTkDqr5h6kTJm0CZ0Ub' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1Ph_Ys3O_vI93WeTkDqr5h6kTJm0CZ0Ub" -O boston201.zip && rm -rf /tmp/cookies.txt
    !unzip -q boston201.zip
    !rm boston201.zip
    !wget -q https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip -O word_vectors.zip
    !unzip -q word_vectors.zip
    !rm word_vectors.zip
    # Mount drive to save models and results
    from google.colab import drive
    drive.mount('/content/drive')

# Model SLT

## Network parameters

In [None]:
Args = namedtuple('Args', 'max_len_sentence data pretrained prefetch_batch_buffer unitsEmbedding vocab_size nIters videos_path rnnUnits dropout recurrent_dropout inputShape optimizer type_frames batchSize epochs lr momentum decay wDecay path2save name')

args = Args(max_len_sentence=15,
            videos_path='../DataSets/boston201',#'/content/boston201',
            rnnUnits=256,
            unitsEmbedding=300,
            vocab_size=150,
            dropout=0.2,
            recurrent_dropout=0.2,
            inputShape=(32, 112, 112, 3),
            pretrained=None,#'vgg16',
            optimizer='adam',
            type_frames='jpg/',
            batchSize=1,
            epochs=20,
            nIters=10.0,
            lr=0.001,
            momentum=0.99,
            decay=0.1,
            wDecay=0.0005,
            path2save='../Saved_Models/', #'/content/drive/My Drive/Models/SLT/<experiment_folder>'
            name='SLT_Model',
            data= '../DataSets/boston201/data/', #'/content/boston201/data/',
            prefetch_batch_buffer = 5
           )

## Preparing data

### Setting the seeds for replicability

In [None]:
random.seed(8128)
np.random.seed(8128)
tf.random.set_seed(8128)

### Initial data

In [None]:
paths_translation = [args.data+'translations.train',  
                         args.data+'translations.test']
paths_videos = [args.data+'pathsigns.train', 
                    args.data+'pathsigns.test']

# Processing sentences and paths
preprocessed_sentences, vocab = preprocessing_sentences(paths_translation, max_len=args.max_len_sentence)
preprocessed_paths = preprocessing_paths(paths_videos, path2videos=args.videos_path, type_=args.type_frames)

#Creating table paths
table_paths=table_paths_dataset(preprocessed_paths, preprocessed_sentences)

### Building tf.data.Dataset

In [None]:
from utils.data_augmentation import frame_sampling

raw_data = flow_from_tablePaths(table_paths, lambda x: x, args.inputShape[1:3])

def train_gen_sampling():
    train_gen = raw_data.data_generator(1, args.inputShape[-1])
    for v, l in train_gen:
        s = np.r_[[int(j) for j in (raw_data.to_class[l]).split(", ")]]
        for new_v in frame_sampling(v, args.inputShape[0]):
            yield (new_v, s[:-1]), s[1:]
train_data = tf.data.Dataset.from_generator(train_gen_sampling, ((tf.float32, tf.int64), tf.int64),
    ((args.inputShape, args.max_len_sentence-1), args.max_len_sentence-1))

def test_gen_sampling():
    test_gen = raw_data.data_generator(2, args.inputShape[-1])
    for v, l in test_gen:
        s = np.r_[[int(j) for j in (raw_data.to_class[l]).split(", ")]]
        for new_v in frame_sampling(v, args.inputShape[0]):
            yield (new_v, s[:-1]), s[1:]
test_data = tf.data.Dataset.from_generator(test_gen_sampling, ((tf.float32, tf.int64), tf.int64),
    ((args.inputShape, args.max_len_sentence-1), args.max_len_sentence-1))

## Network instance

In [None]:
# Entradas de la red
input_video = tf.keras.Input(shape=args.inputShape, name="input_video")
input_words = tf.keras.Input(shape=[args.max_len_sentence-1], name="input_words")

# Compute features and reduce features
x = compute_features.compute_features_v1_0(input_video, weight_decay=tf.keras.regularizers.l2(args.wDecay))
x = reduce_features.reduce_features_v1_2(x)

#Encoder module and self attention
x1, rnn1_states, rnn2_states = encoder.encoder_v1_1(x, args.rnnUnits, args.unitsEmbedding, 
    args.dropout, args.recurrent_dropout)
x1 = self_attentions.self_attention_v1_0(x1)

#Decoder module
x2 = decoder.decoder_v1_0(input_words, rnn1_states, rnn2_states, args.rnnUnits, args.unitsEmbedding, 
    args.vocab_size, args.dropout, args.recurrent_dropout)

# Spatio Temporal attention
x3 = st_attentions.st_attention_v1_4_1(x2, x1, x)

# Output of the network
x = output.output_v1_0(x2, x3, args.vocab_size)

model = tf.keras.Model(inputs=(input_video, input_words), outputs=x, name=args.name)

In [None]:
#model.summary()

In [None]:
#tf.keras.utils.plot_model(model, to_file=args.name+'.png', show_shapes=True)

## Training

### Callbacks

In [None]:
def step_decay(epoch):
    initial_lrate = args.lr
    drop = args.decay
    epochs_drop = args.nIters
    lrate = initial_lrate * math.pow(drop,  
           math.floor((1+epoch)/epochs_drop))
    return lrate
calls = [tf.keras.callbacks.LearningRateScheduler(step_decay, verbose=1),
        ]

### Optimizer

In [None]:
if args.optimizer == 'adam':
    opt = tf.keras.optimizers.Adam(
        lr=args.lr, 
        beta_1=0.9, 
        beta_2=0.999, 
        epsilon=1e-08, 
        decay=0.0, 
        clipnorm=1., 
        clipvalue=5)

elif args.optimizer == 'sgd':
    opt = tf.keras.optimizers.SGD(
        lr=args.lr, 
        decay=0, 
        momentum=args.momentum, 
        nesterov=True, 
        clipnorm=1., 
        clipvalue=0.5)

elif args.optimizer == 'rsmprop':
    opt = tf.keras.optimizers.RMSprop(lr=args.lr) 
                         #clipnorm=1., 
                         #clipvalue=0.5)      
else:
    raise ValueError('You must specify a valid optimizer for model. The only optmizers available are: '
                    '"adam", "sgd" or "rmsprop". The optmizer given was: '+str(args.optimizer))

### Data transformations and augmentations

In [None]:
def scale(data, label):
    return (data[0]/255., data[1]), label

train_data = train_data.cache().map(scale, 24)
train_data = train_data.shuffle(318, reshuffle_each_iteration=True).batch(args.batchSize)
train_data = train_data.prefetch(args.prefetch_batch_buffer)

test_data = test_data.cache().map(scale,24)
test_data = test_data.shuffle(84, reshuffle_each_iteration=True).batch(args.batchSize).prefetch(args.prefetch_batch_buffer)

### Fit and save the model

In [None]:
model.compile(optimizer=opt, loss=SparseCategoricalCrossentropy_mask, metrics=[real_acc,
    tf.keras.metrics.SparseCategoricalAccuracy(name="acc")])

In [None]:
model.fit(x = train_data, 
          epochs=args.epochs,
          callbacks=calls,
          validation_data=test_data)

In [None]:
model.save(os.path.join(args.path2save, "trained_model.h5"), include_optimizer=False)

### Fine-Tuning (Optional to the data)

In [None]:
# The Boston dataset doesn't have dev dision so there is not finetuning

## Results

In [None]:
def train_gen_sampling():
    i = 0
    train_gen = raw_data.data_generator(1, args.inputShape[-1])
    train = table_paths[table_paths[:,1] == "train"]
    
    for v, l in train_gen:
        s = np.r_[[int(j) for j in (raw_data.to_class[l]).split(", ")]]
        p = train[i][0]
        i += 1
        for new_v in [frame_sampling(v, args.inputShape[0])[0]]:
            yield new_v, s, p
train_data = tf.data.Dataset.from_generator(train_gen_sampling, (tf.float32, tf.int64, tf.string)).batch(1)

def test_gen_sampling():
    i = 0
    test_gen = raw_data.data_generator(2, args.inputShape[-1])
    test = table_paths[table_paths[:,1] == "test"]
    for v, l in test_gen:
        s = np.r_[[int(j) for j in (raw_data.to_class[l]).split(", ")]]
        p = test[i][0]
        i += 1
        for new_v in [frame_sampling(v, args.inputShape[0])[0]]:
            yield new_v, s, p
test_data = tf.data.Dataset.from_generator(test_gen_sampling, (tf.float32, tf.int64, tf.string)).batch(1)

In [None]:
results = save_predictions(model, args.path2save, vocab, args, train_data, test_data)

In [None]:
nltk.download('wordnet')
calculate_metrics_results(results)