# Imports

In [1]:
#######################################
### -------- Load libraries ------- ###
# Load Huggingface transformers
from transformers import TFBertModel,  BertConfig, BertTokenizer
from transformers import TFAlbertModel,  AlbertConfig, AlbertTokenizer
from transformers import TFRobertaModel,  RobertaConfig, RobertaTokenizer
from transformers import TFDistilBertModel, BertTokenizer, DistilBertConfig
from transformers import TFXLMModel, XLMTokenizer, XLMConfig, TFSequenceSummary
from transformers import TFXLMRobertaModel, XLMRobertaTokenizer, XLMRobertaConfig
# tensorflow.
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from official.nlp import optimization  # to create AdamW optimizer
import official.nlp.modeling.layers as layers
import tensorflow_addons as tfa
# others
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import shuffle
import os

#loggging
from datetime import datetime

from tensorflow import keras

import numpy as np
import random

#custom functions
from Refactoring.loading import load, load_combined_languages, load_artificial_ood
from Refactoring.model import build_classifier_model, build_classifier_model_last4, load_model
from Refactoring.model import bert_optimizer, get_layers, dlr_optimizer
from Refactoring.evaluation import test_prediction, save_report, save_metrics



2022-01-07 14:12:31.091675: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1


# Settings

In [2]:
#####################################
######### Necessary Setting s########


train_part2 = False #if one wants to train models without setting seeds for the model, set this to "True". 
                    #Necessary for Deep Ensembles & MCD
    
train_part2_ood = False #set to true if one wants to train for artificial ood setting --> different dataset loaded. 
                        #separate evaluation script
                        #if activated, also "train_part2" should be True

if train_part2_ood:
    assert(train_part2==True)

    
n_models = 1 #models to be trained in loop, necessary for deep ensembles

#####################################
########## Seeds ####################
seed= 42 #42 standard, 0, 21, 99, 365

random.seed(seed)

np.random.seed(seed)

if train_part2:
    print("tf seeds not set")
    pass
else:
    tf.random.set_seed(seed)

    os.environ['PYTHONHASHSEED']=str(seed)
    
    print("tf seeds set")



#####################################
######### Necessary Settings ########

language = "RO" #EN, DE, RO, All 

language_model_relation = "multi" # specific vs multi

bert_type = "BERT"  #Roberta, #Distilbert, #BERT # RobertaXLM

epochs = 5 

learning_rate = 1e-4 #5e-5 or 2e-5, 1e-4, 4e-4

layer_strategy = 'last' #last, last4

reduce_strategy = 'cls' # cls,mean, max

last_layer_strategy = "concat" #mean, max, concat --> only relevant for last4 layer strategy

decay_factor = 1 # only takes effect with layer wise lr

layer_wise_lr = False #True for discirminative learning rates



#####################################
#########  Default Settings  ########

#these were not changed in the end. sometimes still used for logging/naming purposes

size = "medium" #used only medium in the end

cased = False # always used recommended case for each model, thus do not change

class_weighting = False #relic of initial experiments

max_length = 40 #only 40 used

batch_size = 32 #only 32 used

val_size=0.2 #only 0.2 used

freeze = False #if one wants to train classification head first only. placeholder is in training loop, 
                #but should be adjusted if one wants to use it 
                #(e.g. set different learning rates for the two training phases)

tf.get_logger().setLevel('ERROR')

tf seeds set


In [3]:
#check device
'''
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
'''

'\nfrom tensorflow.python.client import device_lib\nprint(device_lib.list_local_devices())\n'

# Load Dataset

In [4]:
if train_part2_ood == False: #if we do not delete certain classes from training set
    print("No Classes Deleted")
    if language != "All": #if we only load test sets for one language
        X_train, X_test, y_train, y_test, n_classes, label_dict = load(language=language, seed=seed)
    else: #if we train on all and test on individual datasets
        X_train, y_train, X_train_de, X_test_de, y_train_de, y_test_de, X_train_en, X_test_en, y_train_en, y_test_en, X_train_ro, X_test_ro, y_train_ro, y_test_ro, n_classes, label_dict = load_combined_languages(seed=seed)
else: #if we delete certain classes
    print("Some Classes Deleted")
    X_train, X_test, y_train, y_test, n_classes, label_dict = load_artificial_ood(language=language, seed=seed)

No Classes Deleted


  X_train, y_train, X_train_de, X_test_de, y_train_de, y_test_de, X_train_en, X_test_en, y_train_en, y_test_en, X_train_ro, X_test_ro, y_train_ro, y_test_ro, n_classes, label_dict = load_combined_languages(seed=seed)


In [5]:
len_train = len(X_train) #needed for the optimizer, which is building learning rate schedule depending on number of training steps

# Training Loop

In [None]:
#load tokenizer, transformer_model, config
tokenizer, transformer_model, config = load_model(layer_strategy=layer_strategy, bert_type=bert_type, cased=cased, language=language, language_model_relation=language_model_relation)

for i in range(n_models):
    print("Layer Strategy is:",layer_strategy)
    print("Reduce Strategy is:", reduce_strategy)

    if train_part2: #if we use deep ensembles we cannot use seeds when building the model because the ensemble members do not find many different local minima
        print("training without seeds")
        if layer_strategy == 'last':
            model = build_classifier_model(seed=None, tokenizer=tokenizer, transformer_model=transformer_model, max_length=max_length,bert_type=bert_type, reduce_strategy=reduce_strategy, config=config, n_classes=n_classes)
        elif layer_strategy == 'last4':
            model = build_classifier_model_last4(seed=None, tokenizer=tokenizer, transformer_model=transformer_model, max_length=max_length,bert_type=bert_type, reduce_strategy=reduce_strategy, last_layer_strategy=last_layer_strategy, config=config, n_classes=n_classes)    
    else:
        print("training with seeds")
        if layer_strategy == 'last':
            model = build_classifier_model(seed=seed, tokenizer=tokenizer, transformer_model=transformer_model, max_length=max_length,bert_type=bert_type, reduce_strategy=reduce_strategy, config=config, n_classes=n_classes)
        elif layer_strategy == 'last4':
            model = build_classifier_model_last4(seed=seed, tokenizer=tokenizer, transformer_model=transformer_model, max_length=max_length,bert_type=bert_type, reduce_strategy=reduce_strategy, last_layer_strategy=last_layer_strategy, config=config, n_classes=n_classes)
    model.summary()


    layer_list = get_layers(model) #in case one uses the opimtizer with DLR

    optimizers_and_layers = dlr_optimizer(learning_rate, layer_list, decay_factor, batch_size, epochs, len_train, val_size) #build dlr optimizer

    logdir="logs/fit/" +str(language) +"_"+str(layer_strategy)+ "_"+str(last_layer_strategy) +"_"+ str(reduce_strategy) + "_"+str(decay_factor) + "_" + str(language_model_relation) + "_" + str(cased)+ "_" + str(bert_type) +"_" + str(learning_rate) +"_"+ str(epochs) +"_" +datetime.now().strftime("%Y%m%d-%H%M%S")+ "_random_state" + str(seed)
    modeldir = "models/fit/" + str(language) +"_"+str(layer_strategy) + "_"+str(last_layer_strategy)+"_"+ str(reduce_strategy) + "_"+str(decay_factor)+"_" + str(language_model_relation) + "_" + str(cased)+ "_" + str(bert_type) +"_" + str(learning_rate) +"_"+str(epochs) +"_" +datetime.now().strftime("%Y%m%d-%H%M%S")+ "_random_state" + str(seed) 
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=modeldir,
        save_weights_only=True,
        monitor='val_accuracy',
        mode='max',
        save_best_only=True)
    csv_logger = tf.keras.callbacks.CSVLogger(str(language) +"_"+str(layer_strategy)+ "_"+str(last_layer_strategy) +"_"+ str(reduce_strategy) + "_"+str(decay_factor) + "_" + str(language_model_relation) + "_" + str(cased)+ "_" + str(bert_type) +"_" + str(learning_rate) +"_"+str(epochs) +"_" +datetime.now().strftime("%Y%m%d-%H%M%S")+ "_random_state" + str(seed)+'.log')

    #please note that one may/should use different learning rates in the two different training steps when using freezing; 
    if freeze:
    # if settings indicate that classifier should be first trained isolatedly and only after that fine-tuning should start
        model.layers[2].trainable=False #first set main model layer to non-trainable
        optimizer = bert_optimizer(learning_rate, batch_size, epochs, len_train)

        # Ready output data for the model
        y_train_categorical = to_categorical(y_train)

        # Tokenize the input (takes some time)
        x = tokenizer(
        text=list(X_train),
        add_special_tokens=True,
        max_length=max_length,
        truncation=True,
        padding=True, 
        return_tensors='tf',
        return_token_type_ids = False,
        return_attention_mask = True,
        verbose = True)

        # Set loss and metrics
        loss = {'output': CategoricalCrossentropy(from_logits = True)}
        metric = {'output': [CategoricalAccuracy('accuracy'),tf.keras.metrics.TopKCategoricalAccuracy(k=2, name="top2"),tf.keras.metrics.TopKCategoricalAccuracy(k=3, name="top3"), tf.keras.metrics.TopKCategoricalAccuracy(k=4, name="top4"),tf.keras.metrics.TopKCategoricalAccuracy(k=5, name="top5")]}


        # Compile the model 
        model.compile(
          optimizer = optimizer,
          loss = loss, 
          metrics = metric)

        # Fit the model
        history = model.fit(
          x={'input_ids': x['input_ids'], 'attention_mask':x["attention_mask"]},
          y=y_train_categorical,
          validation_split=val_size,
          batch_size=batch_size,
          epochs=epochs, shuffle=True, callbacks=[tensorboard_callback,model_checkpoint_callback, csv_logger]) 

        model.layers[2].trainable=True #now we need to make base model trainable again and then compile again

        # Compile the model
        model.compile(
        optimizer = optimizer,
        loss = loss, 
        metrics = metric)

        # Fit the model
        history = model.fit(
        x={'input_ids': x['input_ids'], 'attention_mask':x["attention_mask"]},
        y=y_train_categorical,
        validation_split=val_size,
        batch_size=batch_size,
        epochs=epochs, callbacks=[tensorboard_callback, csv_logger,model_checkpoint_callback], shuffle=True)

    #######################################
    ### ------- Train the model ------- ###
    # Set optimizer # 

    if layer_wise_lr:
        optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)
    else:
        optimizer = bert_optimizer(learning_rate, batch_size, epochs, len_train)

    print(optimizer)

    # Set loss and metrics
    loss = {'output': CategoricalCrossentropy(from_logits = True)}
    metric = {'output': [CategoricalAccuracy('accuracy'),tf.keras.metrics.TopKCategoricalAccuracy(k=2, name="top2"),tf.keras.metrics.TopKCategoricalAccuracy(k=3, name="top3"), tf.keras.metrics.TopKCategoricalAccuracy(k=4, name="top4"),tf.keras.metrics.TopKCategoricalAccuracy(k=5, name="top5")]}

    # Compile the model
    model.compile(
        optimizer = optimizer,
        loss = loss, 
        metrics = metric)

    # Ready output data for the model
    y_train_categorical = to_categorical(y_train)

    # Tokenize the input
    x = tokenizer(
        text=list(X_train),
        add_special_tokens=True,
        max_length=max_length,
        truncation=True,
        padding=True,
        return_tensors='tf',
        return_token_type_ids = False,
        return_attention_mask = True,
        verbose = True)

    # Fit the model
    history = model.fit(
        x={'input_ids': x['input_ids'], 'attention_mask':x["attention_mask"]},
        y=y_train_categorical,
        validation_split=val_size,
        batch_size=batch_size,
        epochs=epochs, callbacks=[tensorboard_callback, csv_logger,model_checkpoint_callback], shuffle=True) #,

Downloading:   0%|          | 0.00/1.76G [00:00<?, ?B/s]

2022-01-07 14:14:45.933176: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2022-01-07 14:14:45.943778: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:65:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2022-01-07 14:14:45.943848: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2022-01-07 14:14:45.949132: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2022-01-07 14:14:45.952891: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2022-01-07 14:14:45.954673: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so

PreTrainedTokenizer(name_or_path='jplu/tf-xlm-roberta-base', vocab_size=250002, model_max_len=1000000000000000019884624838656, is_fast=False, padding_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True)})
<transformers.models.xlm_roberta.modeling_tf_xlm_roberta.TFXLMRobertaModel object at 0x7fbcc9e82f10>
Layer Strategy is: last
Reduce Strategy is: cls
training with seeds
Model: "Text-Classifier"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
attention_mask (InputLayer)     [(None, 40)]         0                                            
__________________________________________________________________________________________________
inpu

2022-01-07 14:15:01.452929: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2022-01-07 14:15:01.452971: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1391] Profiler found 1 GPUs
2022-01-07 14:15:01.453217: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcupti.so.10.1'; dlerror: libcupti.so.10.1: cannot open shared object file: No such file or directory
2022-01-07 14:15:01.453283: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcupti.so'; dlerror: libcupti.so: cannot open shared object file: No such file or directory
2022-01-07 14:15:01.453292: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1441] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.


Epoch 1/5
   1/8651 [..............................] - ETA: 0s - loss: 5.9754 - accuracy: 0.0000e+00 - top2: 0.0000e+00 - top3: 0.0000e+00 - top4: 0.0000e+00 - top5: 0.0000e+00

2022-01-07 14:15:31.431948: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2022-01-07 14:15:31.431991: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1441] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.


   2/8651 [..............................] - ETA: 39:28 - loss: 5.9948 - accuracy: 0.0000e+00 - top2: 0.0000e+00 - top3: 0.0000e+00 - top4: 0.0000e+00 - top5: 0.0000e+00

2022-01-07 14:15:31.836145: I tensorflow/core/profiler/internal/gpu/device_tracer.cc:223]  GpuTracer has collected 0 callback api events and 0 activity events. 
2022-01-07 14:15:31.865165: I tensorflow/core/profiler/rpc/client/save_profile.cc:176] Creating directory: logs/fit/All_last_concat_cls_1_multi_False_RobertaXLM_0.0001_5_20220107-141501_random_state42/train/plugins/profile/2022_01_07_14_15_31
2022-01-07 14:15:31.875433: I tensorflow/core/profiler/rpc/client/save_profile.cc:182] Dumped gzipped tool data for trace.json.gz to logs/fit/All_last_concat_cls_1_multi_False_RobertaXLM_0.0001_5_20220107-141501_random_state42/train/plugins/profile/2022_01_07_14_15_31/zwinge.trace.json.gz
2022-01-07 14:15:31.920673: I tensorflow/core/profiler/rpc/client/save_profile.cc:176] Creating directory: logs/fit/All_last_concat_cls_1_multi_False_RobertaXLM_0.0001_5_20220107-141501_random_state42/train/plugins/profile/2022_01_07_14_15_31
2022-01-07 14:15:31.931897: I tensorflow/core/profiler/rpc/clie

  62/8651 [..............................] - ETA: 52:31 - loss: 5.9755 - accuracy: 0.0116 - top2: 0.0212 - top3: 0.0282 - top4: 0.0338 - top5: 0.0378

## Evaluation on Test Set - only after hyperparameter tuning & only for in-domain

In [None]:
tokenizer, transformer_model, config = load_model(layer_strategy=layer_strategy, bert_type=bert_type, cased=cased, language=language, language_model_relation=language_model_relation)

In [None]:
print("Layer Strategy is:",layer_strategy)
print("Reduce Strategy is:", reduce_strategy)

if train_part2:
    print("training without seeds")
    # Take a look at the model
    if layer_strategy == 'last':
        model = build_classifier_model(seed=None, tokenizer=tokenizer, transformer_model=transformer_model, max_length=max_length,bert_type=bert_type, reduce_strategy=reduce_strategy, config=config, n_classes=n_classes)
    elif layer_strategy == 'last4':
        model = build_classifier_model_last4(seed=None, tokenizer=tokenizer, transformer_model=transformer_model, max_length=max_length,bert_type=bert_type, reduce_strategy=reduce_strategy, last_layer_strategy=last_layer_strategy, config=config, n_classes=n_classes)    
else:
    print("training with seeds")
    # Take a look at the model
    if layer_strategy == 'last':
        model = build_classifier_model(seed=seed, tokenizer=tokenizer, transformer_model=transformer_model, max_length=max_length,bert_type=bert_type, reduce_strategy=reduce_strategy, config=config, n_classes=n_classes)
    elif layer_strategy == 'last4':
        model = build_classifier_model_last4(seed=seed, tokenizer=tokenizer, transformer_model=transformer_model, max_length=max_length,bert_type=bert_type, reduce_strategy=reduce_strategy, last_layer_strategy=last_layer_strategy, config=config, n_classes=n_classes)
model.summary()

In [None]:
modeldir= "EN_last4_concat_cls_1_specific_False_Distilbert_0.0001_5_20220104-185410_random_state42"

In [None]:
# The model weights (that are considered the best) are loaded into the model.
model.load_weights('models/fit/'+modeldir)



In [None]:
inv_label_dict = {v: k for k, v in label_dict.items()} #reverse the mapping

In [None]:
assert(train_part2_ood == False)
if language != "All":
    y_pred, predictions_logit, y_test_categorical = test_prediction(model,tokenizer, X_test, y_test, n_classes, max_length)
    save_report(y_test,y_pred,n_classes,label_dict,inv_label_dict, language, layer_strategy, last_layer_strategy, reduce_strategy, decay_factor, language_model_relation, cased, bert_type, learning_rate, epochs, seed, test_lang=language)
    save_metrics(y_test_categorical, predictions_logit,n_classes,label_dict,inv_label_dict, language, layer_strategy, last_layer_strategy, reduce_strategy, decay_factor, language_model_relation, cased, bert_type, learning_rate, epochs, seed, test_lang=language)
else: #we need to evaluate the multilingual model on each language
    for i in [[X_test_en, y_test_en,"en"],[X_test_de, y_test_de,"de"],[X_test_ro, y_test_ro,"ro"]]:
        print("Evaluating the following language: " + i[2])
        y_pred, predictions_logit, y_test_categorical = test_prediction(model,tokenizer, i[0], i[1], n_classes, max_length)
        save_report(i[1],y_pred,n_classes,label_dict,inv_label_dict, language, layer_strategy, last_layer_strategy, reduce_strategy, decay_factor, language_model_relation, cased, bert_type, learning_rate, epochs, seed, test_lang=i[2])
        save_metrics(y_test_categorical, predictions_logit,n_classes,label_dict,inv_label_dict, language, layer_strategy, last_layer_strategy, reduce_strategy, decay_factor, language_model_relation, cased, bert_type, learning_rate, epochs, seed, test_lang=i[2])