In [None]:
import tensorflow as tf
import tensorflow_hub as hub

In [3]:
from transformers import RobertaTokenizer, TFRobertaModel
from transformers import BertTokenizer, TFBertModel
from transformers import XLNetTokenizer, TFXLNetModel

from tensorflow.keras.optimizers import Adam
import os
import numpy as np
import pandas as pd

In [4]:
transcript = pd.read_csv("ASR.csv")

In [5]:
models = {'roberta':(RobertaTokenizer,'roberta-large',TFRobertaModel),
          'bert':(BertTokenizer, 'bert-large-uncased', TFBertModel),
          'xlnet':(XLNetTokenizer, 'xlnet-large-cased', TFXLNetModel)}

In [6]:
tokenizer, model_type, model_name = models['roberta']

In [7]:
def make_inputs(tokenizer, model_type, serie, max_len= 128):

    tokenizer = tokenizer.from_pretrained(model_type, lowercase=True )
    tokenized_data = [tokenizer.encode_plus(text, max_length=max_len,
                                            padding='max_length',
                                            add_special_tokens=True,
                                            truncation = True) for text in serie]


    input_ids = np.array([text['input_ids'] for text in tokenized_data])
    attention_mask = np.array([text['attention_mask'] for text in tokenized_data])

    return input_ids, attention_mask

In [None]:
input_ids_train, attention_mask_train = \
make_inputs(tokenizer, model_type, transcript["transcription"], max_len= 128)

In [9]:
##### TPU or no TPU
def init_model(model_name, model_type, num_labels, Tpu = 'on', max_len = 128):
# ------------------------------------------------ with TPU --------------------------------------------------------------#
    if Tpu == 'on':
        # a few lines of code to get our tpu started and our data distributed on it
        resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
        tf.config.experimental_connect_to_cluster(resolver)
        tf.tpu.experimental.initialize_tpu_system(resolver)
        # print("All devices: ", tf.config.list_logical_devices('TPU'))

        strategy = tf.distribute.experimental.TPUStrategy(resolver)
        with strategy.scope():

            model_ = model_name.from_pretrained(model_type)
            # inputs
            input_ids = tf.keras.Input(shape = (max_len, ), dtype = 'int32')
            attention_masks = tf.keras.Input(shape = (max_len,), dtype = 'int32')

            outputs = model_([input_ids, attention_masks])

            if 'xlnet' in model_type:
                # cls is the last token in xlnet tokenization
                outputs = outputs[0]
                cls_output = tf.squeeze(outputs[:, -1:, :], axis=1)
            else:
                cls_output = outputs[1]

            final_output = tf.keras.layers.Dense(num_labels, activation = 'softmax')(cls_output)
            model = tf.keras.Model(inputs = [input_ids, attention_masks], outputs = final_output)
            model.compile(optimizer = Adam(lr = 1e-5), loss = 'categorical_crossentropy',
                        metrics = ['accuracy'])
# ------------------------------------------------ without TPU --------------------------------------------------------------#
    else:
        model_ = model_name.from_pretrained(model_type)
        # inputs
        input_ids = tf.keras.Input(shape = (max_len, ), dtype = 'int32')
        attention_masks = tf.keras.Input(shape = (max_len,), dtype = 'int32')

        outputs = model_([input_ids, attention_masks])

        if 'xlnet' in model_type:
            # cls is the last token in xlnet tokenization
            outputs = outputs[0]
            cls_output = tf.squeeze(outputs[:, -1:, :], axis=1)
        else:
            cls_output = outputs[1]


        final_output = tf.keras.layers.Dense(num_labels, activation = 'softmax')(cls_output)

        model = tf.keras.Model(inputs = [input_ids, attention_masks], outputs = final_output)

        model.compile(optimizer = Adam(lr = 1e-5), loss = 'categorical_crossentropy',
                    metrics = ['accuracy'])
    return model

In [None]:
model = init_model(model_name, model_type, num_labels = 2, Tpu = 'off', max_len = 128)

In [11]:
train_y = tf.keras.utils.to_categorical(transcript["label"], num_classes=2)
del transcript

In [None]:
model.fit([input_ids_train, attention_mask_train], train_y,
          validation_split=0.25, epochs = 10, batch_size = 8,
          shuffle = True)