In [8]:
!pip install tensorflow==2.12.0


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [7]:
# Matplotlib Inline
%matplotlib inline

# Import Modules
import gc
import random
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
# import seaborn as sns
import tensorflow as tf
from typing import Tuple
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Install Transformers
!pip install transformers==4.28.1
from transformers import (TFGPT2Model, 
                          TFMBartModel,
                          TFBertForSequenceClassification,
                          TFDistilBertForSequenceClassification,
                          TFXLMRobertaForSequenceClassification,
                          TFMT5ForConditionalGeneration,
                          TFT5ForConditionalGeneration,
                          T5Tokenizer,
                          AutoTokenizer,
                          AutoConfig,
                         TFBertModel)

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [9]:
# Configure Strategy. Assume TPU...if not set default for GPU/CPU
tpu = None
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError:
    strategy = tf.distribute.get_strategy()
    
# Seeds
def set_seeds(seed: int)->None:
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed) 
    
# Generic Constants
MAX_LEN = 512
TEST_SIZE = 0.2
LR = 0.00002
VERBOSE = 1
SEED = 1000
set_seeds(SEED)

# Set Autotune
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Set Batch Size
BASE_BATCH_SIZE = 4         # Modify to match your GPU card.
if tpu is not None:         
    BASE_BATCH_SIZE = 8     # TPU v2 or up...
BATCH_SIZE = BASE_BATCH_SIZE * strategy.num_replicas_in_sync

INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.
INFO:tensorflow:Initializing the TPU system: local
INFO:tensorflow:Finished initializing TPU system.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:2, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:3, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:4, TPU

To maximize the reproducibility for each model run we will use the same Seed, Batch Size and Learning Rate.

In [10]:
# Summary
print(f'Seed: {SEED}')
print(f'Replica Count: {strategy.num_replicas_in_sync}')
print(f'Batch Size: {BATCH_SIZE}')
print(f'Learning Rate: {LR}')

Seed: 1000
Replica Count: 8
Batch Size: 64
Learning Rate: 2e-05


The next section contains some plumbing code to get and combine the different json files of the dataset into a Pandas DataFrame.

Also the necessary code to create the Tensorflow Datasets is provided.

In [11]:
def create_dataset(df, max_len, tokenizer, batch_size, shuffle=False):
    total_samples = df.shape[0]

    # Placeholders input
    input_ids, input_masks = [], []

    # Placeholder output
    labels = []

    # Tokenize
    for index, row in tqdm(zip(range(0, total_samples), df.iterrows()), total=total_samples):

        # Get title and description as strings
        text = row[1]['Tweet']
        partisan = row[1]['Type of Claim']

        # Encode
        input_encoded = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=max_len,
            truncation=True,
            padding='max_length'
        )
        input_ids.append(input_encoded['input_ids'])
        input_masks.append(input_encoded['attention_mask'])
#         ['entertainment' 'state' 'sports' 'national' 'kolkata' 'international']
        labels.append(
            0 if partisan == 'Simple' else
            1 if partisan == 'Composite' else
            2 if partisan == 'Compound' else None)

    # Prepare and Create TF Dataset.
    all_input_ids = tf.Variable(input_ids)
    all_input_masks = tf.Variable(input_masks)
    all_labels = tf.Variable(labels)
    
    dataset = tf.data.Dataset.from_tensor_slices(
        (
            {
                'input_ids': all_input_ids,
                'attention_mask': all_input_masks
            },
            all_labels
        )
    )
    
    if shuffle:
        dataset = dataset.shuffle(64, reshuffle_each_iteration=True)
        
    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

    return dataset

## Models Code

The following section contains the code for setting up the different models, saving the model files and a custom accuracy metric implementation for the mT5 and ByT5 models.

In [12]:
def ModelCheckpoint(model_name):
    return tf.keras.callbacks.ModelCheckpoint(model_name, 
                                              monitor = 'val_accuracy', 
                                              verbose = 1, 
                                              save_best_only = True, 
                                              save_weights_only = True, 
                                              mode = 'max', 
                                              period = 1)

def create_distilmbert_model(model_type, strategy, config, lr):
    # Create 'Standard' Classification Model
    with strategy.scope():   
        model = TFDistilBertForSequenceClassification.from_pretrained(model_type, config = config)
        
        optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

        model.compile(optimizer = optimizer, loss = loss, metrics = [metric])        
        
        return model

def create_gpt2_model(model_type, max_len, strategy, config, lr, tokenizer):
    # NOTE! There is a TFGPT2ForSequenceClassification class available
    # When using it I ran into some issues which where similar to some open issues on the
    # Huggingface site. I will give this some more effort when I have the time available.
    # Creating a Custom Model with TFGPT2Model just works...and does the same thing.
    with strategy.scope():   
        input_ids = tf.keras.layers.Input(shape = (max_len,), dtype = tf.int32, name = 'input_ids')
        input_masks = tf.keras.layers.Input(shape = (max_len,), dtype = tf.int32, name = 'attention_mask')
        
        gpt2_model = TFGPT2Model.from_pretrained(model_type, config = config, from_pt = True)
        gpt2_model.resize_token_embeddings(len(tokenizer))
        gpt2_model.config.pad_token_id = gpt2_model.config.eos_token_id 
        
        last_hidden_states = gpt2_model({'input_ids': input_ids, 'attention_mask': input_masks})
        x = last_hidden_states[0][:, 0, :]
        x = tf.keras.layers.Dropout(0.2)(x)
        outputs = tf.keras.layers.Dense(6)(x)
        model = tf.keras.Model(inputs = [input_ids, input_masks], outputs = outputs) 

        optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

        # Compile
        model.compile(optimizer = optimizer, loss = loss, metrics = [metric])        
        
        return model
    
def create_mbart_model(model_type, strategy, config, max_len, lr):
    # Create 'Custom' Classification Model as we only have TFMBartModel
    with strategy.scope():   
        input_ids = tf.keras.layers.Input(shape = (max_len,), dtype = tf.int32, name = 'input_ids')
        input_masks = tf.keras.layers.Input(shape = (max_len,), dtype = tf.int32, name = 'attention_mask')
        
        mbart_model = TFMBartModel.from_pretrained(model_type, config = config, from_pt = True)
        
        last_hidden_states = mbart_model({'input_ids': input_ids, 'attention_mask': input_masks})
        x = last_hidden_states[0][:, 0, :]
        x = tf.keras.layers.Dropout(0.2)(x)
        outputs = tf.keras.layers.Dense(2)(x)
        model = tf.keras.Model(inputs = [input_ids, input_masks], outputs = outputs) 

        optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

        # Compile
        model.compile(optimizer = optimizer, loss = loss, metrics = [metric])        
        
        return model
    
def create_mbert_model(model_type, strategy, config, lr):
    # Create 'Standard' Classification Model
    with strategy.scope():   
        model = TFBertForSequenceClassification.from_pretrained(model_type, config = config)
        
        optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

        model.compile(optimizer = optimizer, loss = loss, metrics = [metric])        
        
        return model
    
def create_xlm_roberta_model(model_type, strategy, config, lr):            
    # Create 'Standard' Classification Model
    with strategy.scope():   
        model = TFXLMRobertaForSequenceClassification.from_pretrained(model_type, config = config)
        
        optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

        model.compile(optimizer = optimizer, loss = loss, metrics = [metric])        
        
        return model



## Get Twitter Dataset

Next it is time to create the news dataset that will be used for training and validation of the 4 models.

In the dataframe sample output below you can see the 'text' column that will be used as input text for each model. Also visible is the column 'partisan' that will be used as the label for which the models will learn to classify the input text.

In [13]:
pip install openpyxl


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [14]:
import pandas as pd
dpgnews_df = pd.read_excel('/kaggle/input/data-train/English Anotated data.xlsx') 

In [15]:
dpgnews_df.head(2)

Unnamed: 0,ID,Tweet,Selected Claim,Type of Claim,Entities,Remark
0,1669782054240689920,#IntelBrief: In addition to #AlQaeda &amp; ISK...,"In addition to #AlQaeda &amp; ISK, there are a...",Composite,"AlQaeda, terrorist, groups, active, Afghanista...",
1,1669781942504389888,Afghanistan-based extremists spark terror fear...,Afghanistan-based extremists spark terror fear...,Simple,"Afghanistan, spark, fears, Central, Asia terro...",


In [16]:
labels = dpgnews_df['Type of Claim'].unique()

print(labels)

['Composite' 'Simple' 'Compound']


In [17]:
# Create Train Test Split
train_df, val_df = train_test_split(dpgnews_df, 
                                    stratify = dpgnews_df['Type of Claim'].values, 
                                    test_size = TEST_SIZE, 
                                    random_state = SEED)

## Multi-Lingual BERT

The first model we will put to the test is Multi-Lingual BERT. When released in 2018 BERT caused a small revolution by improving drastically the scores achieved on multiple NLP tasks. To review the paper use the following [link](https://arxiv.org/abs/1810.04805).

Multi-Lingual BERT is the same model...however pre-trained on a large multi-lingual Wikipedia dataset containing the top 104 languages. The model was pre-trained on 2 objectives: Masked Language Modelling and Next Sentence Prediction.

Note that we will train the model for 4 epochs only. With the size of the used dataset this is more than sufficient to make sure the model converges.

In [23]:
# Multi-Lingual BERT Constants
EPOCHS = 12
model_type = 'bert-base-multilingual-cased'

# Set Config
config = AutoConfig.from_pretrained(model_type, num_labels = 3) 
# Set Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_type, add_prefix_space = False, do_lower_case = False)

# Cleanup
tf.keras.backend.clear_session()    
if tpu is not None:
    tf.tpu.experimental.initialize_tpu_system(tpu)
gc.collect()

# Create Train and Validation Datasets
train_dataset = create_dataset(train_df, MAX_LEN, tokenizer, BATCH_SIZE, shuffle = True)
validation_dataset = create_dataset(val_df, MAX_LEN, tokenizer, BATCH_SIZE, shuffle = False)

# Steps
train_steps = train_df.shape[0] // BATCH_SIZE
val_steps = val_df.shape[0] // BATCH_SIZE
print(f'Train Steps: {train_steps}')
print(f'Val Steps: {val_steps}')

# Create Model
model_BERT = create_mbert_model(model_type, strategy, config, LR)

# Model Summary
print(model_BERT.summary())

# Fit Model
history = model_BERT.fit(train_dataset,
                    steps_per_epoch = train_steps,
                    validation_data = validation_dataset,
                    validation_steps = val_steps,
                    epochs = EPOCHS, 
                    verbose = VERBOSE)

INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.


INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.






INFO:tensorflow:Initializing the TPU system: local


INFO:tensorflow:Initializing the TPU system: local


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.
100%|██████████| 412/412 [00:00<00:00, 1619.94it/s]
100%|██████████| 104/104 [00:00<00:00, 1566.31it/s]


Train Steps: 6
Val Steps: 1


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  177853440 
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 classifier (Dense)          multiple                  2307      
                                                                 
Total params: 177,855,747
Trainable params: 177,855,747
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/12


2023-08-14 17:12:04.049430: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2023-08-14 17:12:04.903969: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.




2023-08-14 17:12:10.712750: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2023-08-14 17:12:10.867931: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.


Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [19]:
pip install sentencepiece


Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
# # Make predictions on the test dataset
# y_true_RoBERT = []
# y_pred_RoBERT = []
# # outputs_XLM_logit = []

# for batch in test_dataset:
#     inputs = batch[0]
#     labels = batch[1]
#     outputs_XLM = model_XLMRoBERTa.predict(inputs)
#     predicted_labels = np.argmax(outputs_XLM.logits, axis=1)  # Modify this line
    
#     y_true_RoBERT.extend(labels.numpy().tolist())
#     y_pred_RoBERT.extend(predicted_labels.tolist())


In [None]:
# from sklearn.metrics import classification_report

# # Assuming you have true labels y_true and predicted labels y_pred
# report = classification_report(y_true_RoBERT,y_pred_RoBERT)

# print(report)

# # https://mathweb.ucsd.edu/~bdriver/286-Spring2008/Lecture%20Notes/SDE20080401.pdf

In [None]:
# from sklearn.metrics import confusion_matrix

# # Calculate confusion matrix
# confusion_mat = confusion_matrix(true_labels, predicted_labels)

# # Print the confusion matrix
# print("Confusion Matrix:")
# print(confusion_mat)


After training the model we can view the performance on the validation set. Let's see what classification accuracy has been achieved.

In [2]:
from transformers import AutoConfig, AutoTokenizer


# Rest of your code


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# XLM-RoBERTa Constants
EPOCHS = 3
model_type = 'roberta-base'

# Set Config
config = AutoConfig.from_pretrained(model_type, num_labels = 3) # 2 labels because we do binary classification

# Set Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_type, add_prefix_space = False, do_lower_case = False)

# Cleanup
tf.keras.backend.clear_session()    
if tpu is not None:
    tf.tpu.experimental.initialize_tpu_system(tpu)
gc.collect()

# Create Train and Validation Datasets
train_dataset = create_dataset(train_df, MAX_LEN, tokenizer, BATCH_SIZE, shuffle = True)
validation_dataset = create_dataset(val_df, MAX_LEN, tokenizer, BATCH_SIZE, shuffle = False)

# Steps
train_steps = train_df.shape[0] // BATCH_SIZE
val_steps = val_df.shape[0] // BATCH_SIZE
print(f'Train Steps: {train_steps}')
print(f'Val Steps: {val_steps}')

# Create Model
model_XLMRoBERTa = create_xlm_roberta_model(model_type, strategy, config, LR)

# Model Summary
print(model_XLMRoBERTa.summary())

# Fit Model
history = model_XLMRoBERTa.fit(train_dataset,
                    steps_per_epoch = train_steps,
                    validation_data = validation_dataset,
                    validation_steps = val_steps,
                    epochs = EPOCHS, 
                    verbose = VERBOSE)

INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.
INFO:tensorflow:Initializing the TPU system: local


free(): corrupted unsorted chunks
https://symbolize.stripped_domain/r/?trace=79d9c0641ccc,79d9c05f2f8f,56949ce5248f&map= 
*** SIGABRT received by PID 9864 (TID 10680) on cpu 78 from PID 9864; stack trace: ***
PC: @     0x79d9c0641ccc  (unknown)  (unknown)
    @     0x79d8c4a087fa       1152  (unknown)
    @     0x79d9c05f2f90      16528  (unknown)
    @     0x56949ce52490  (unknown)  (unknown)
https://symbolize.stripped_domain/r/?trace=79d9c0641ccc,79d8c4a087f9,79d9c05f2f8f,56949ce5248f&map=a5742613da17b2beb1178f2ea3f818c3:79d8b9600000-79d8c4c20be0 
E0814 17:28:51.181303   10680 coredump_hook.cc:409] RAW: Remote crash data gathering hook invoked.
E0814 17:28:51.181321   10680 client.cc:278] RAW: Coroner client retries enabled (b/136286901), will retry for up to 30 sec.
E0814 17:28:51.181324   10680 coredump_hook.cc:507] RAW: Sending fingerprint to remote end.
E0814 17:28:51.181332   10680 coredump_socket.cc:120] RAW: Stat failed errno=2 on socket /var/google/services/logmanagerd/remote

In [17]:
# Validation Performance
print(f'\n===== Multi-Lingual BERT Classification Accuracy: {np.max(history.history["val_accuracy"])*100:.3f}%')


===== Multi-Lingual BERT Classification Accuracy: 93.980%


## Multi-Lingual DistilBERT

The second model we will put to the test is Multi-Lingual DistilBERT. To review the paper use the following [link](https://arxiv.org/abs/1910.01108).

Multi-Lingual DistilBERT is 40% smaller in size than BERT, 60% faster and retained 97% of the language understanding capabilities according to the paper summary.

Note that we will train the model for 4 epochs only. With the size of the used dataset this is more than sufficient to make sure the model converges.

In [3]:
# Multi-Lingual DistilBERT Constants
EPOCHS = 3
model_type = 'distilbert-base-multilingual-cased'

# Set Config
config = AutoConfig.from_pretrained(model_type, num_labels = 3) 
# Set Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_type, add_prefix_space = False, do_lower_case = False)

# Cleanup
tf.keras.backend.clear_session()    
if tpu is not None:
    tf.tpu.experimental.initialize_tpu_system(tpu)
gc.collect()

# Create Train and Validation Datasets
train_dataset = create_dataset(train_df, MAX_LEN, tokenizer, BATCH_SIZE, shuffle = True)
validation_dataset = create_dataset(val_df, MAX_LEN, tokenizer, BATCH_SIZE, shuffle = False)

# Steps
train_steps = train_df.shape[0] // BATCH_SIZE
val_steps = val_df.shape[0] // BATCH_SIZE
print(f'Train Steps: {train_steps}')
print(f'Val Steps: {val_steps}')

# Create Model
model = create_distilmbert_model(model_type, strategy, config, LR)

# Model Summary
print(model.summary())

# Fit Model
history = model.fit(train_dataset,
                    steps_per_epoch = train_steps,
                    validation_data = validation_dataset,
                    validation_steps = val_steps,
                    epochs = EPOCHS, 
                    verbose = VERBOSE)

Downloading (…)lve/main/config.json: 100%|██████████| 466/466 [00:00<00:00, 54.5kB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 29.0/29.0 [00:00<00:00, 4.04kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 996k/996k [00:00<00:00, 10.9MB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 1.96M/1.96M [00:00<00:00, 8.67MB/s]


NameError: name 'tf' is not defined

After training the model we can view the performance on the validation set. Let's see what classification accuracy has been achieved.

In [None]:
# Validation Performance
print(f'\n===== Multi-Lingual DistilBERT Classification Accuracy: {np.max(history.history["val_accuracy"])*100:.3f}%')

In [None]:
# Cleanup
del model, train_dataset, validation_dataset
gc.collect()

## XLM-RoBERTa

The third model we will put to the test is XLM-RoBERTa. It is based on the earlier released RoBERTa model. In the paper it is mentioned that XLM-RoBERTa outperforms Multi-Lingual BERT on various tasks. To review the paper use the following [link](https://arxiv.org/abs/1911.02116).

XLM-RoBERTa was pre-trained on 2.5TB of filtered text from the Common Crawl dataset. The dataset contains text for the top 100 languages. The model was pre-trained on 1 objective: Masked Language Modelling.

Note that we will train the model for 4 epochs only. With the size of the used dataset this is more than sufficient to make sure the model converges.

After training the model we can view the performance on the validation set. Let's see what classification accuracy has been achieved.

In [None]:
# Validation Performance
print(f'\n===== XLM-RoBERTa Classification Accuracy: {np.max(history.history["val_accuracy"])*100:.3f}%')

In [None]:
# Cleanup
del model, train_dataset, validation_dataset
gc.collect()

## MBart

The fourth model we will put to the test is MBart. To review the paper use the following [link](https://arxiv.org/abs/2001.08210).

Note that we will train the model for 4 epochs only. With the size of the used dataset this is more than sufficient to make sure the model converges.

In [None]:
# MBart Constants
EPOCHS = 3
model_type = 'facebook/mbart-large-cc25'

# Set Config
config = AutoConfig.from_pretrained(model_type, num_labels = 2) # 2 labels because we do binary classification

# Set Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_type, add_prefix_space = False, do_lower_case = False)

# Cleanup
tf.keras.backend.clear_session()    
if tpu is not None:
    tf.tpu.experimental.initialize_tpu_system(tpu)
gc.collect()

# Create Train and Validation Datasets
train_dataset = create_dataset(train_df, MAX_LEN, tokenizer, BATCH_SIZE, shuffle = True)
validation_dataset = create_dataset(val_df, MAX_LEN, tokenizer, BATCH_SIZE, shuffle = False)

# Steps
train_steps = train_df.shape[0] // BATCH_SIZE
val_steps = val_df.shape[0] // BATCH_SIZE
print(f'Train Steps: {train_steps}')
print(f'Val Steps: {val_steps}')

# Create Model
model = create_mbart_model(model_type, strategy, config, MAX_LEN, LR)

# Model Summary
print(model.summary())

# Fit Model
history = model.fit(train_dataset,
                    steps_per_epoch = train_steps,
                    validation_data = validation_dataset,
                    validation_steps = val_steps,
                    epochs = EPOCHS, 
                    verbose = VERBOSE)

After training the model we can view the performance on the validation set. Let's see what classification accuracy has been achieved.

In [None]:
# Validation Performance
print(f'\n===== MBart Classification Accuracy: {np.max(history.history["val_accuracy"])*100:.3f}%')

One 'issue' that we have with mT5 (and for ByT5...) is that it is a generative model. It generates text and doesn't have a Dense output layer as Multi-Lingual BERT or XLM-RoBERTa where we output a probability between 0 and 1 to predict the partisan label.

What we can do however is generate 'text-labels' that present the classification label.

So we will train the mT5 and ByT5 models to predict the following 'text-labels':
* Partisan label: True ==> mT5/ByT5 label to generate/classify as 'politiek'
* Partisan label: False ==> mT5/ByT5 label to generate/classify as 'neutraal'

Below you can see how the labels are encoded and what their token values are.

In [None]:
# Cleanup
del model, train_dataset, validation_dataset
gc.collect()

After training the model we can view the performance on the validation set. Let's see what classification accuracy has been achieved.

## Summary and Results

After training all the models and running the evaluation on the validation set we can see the achieved accuracy for each of the models.

Below an overview of the achieved accuracy scores (these scores are based on the previous version...so the scores can vary slightly because of the randomness involved...):
1. MBart: xx%
2. XLM-RoBERTa: xx%
3. Multi-Lingual BERT: 67%
4. Multi-Lingual DistilBERT: xx%


