# Humor Classification using Multimodal Transformers

In [2]:
from humor_recognition.models import *
from humor_recognition.tasks import train, predict

from datasets import Dataset
from pathlib import Path
from transformers import (AutoModelForSequenceClassification,
                          AutoTokenizer,
                          DataCollatorWithPadding,
                          Trainer,
                          TrainingArguments,
                          pipeline)

import pandas as pd


In [3]:
train_corpus_path = Path('../../../BRUM/data/cross_validation/fold_0/train.json')
train_features_path = '../../data/humor_features/fold_0/train.csv'
test_corpus_path = Path('../../../BRUM/data/cross_validation/fold_0/test.json')
test_features_path = Path('../../data/humor_features/fold_0/test.csv')

## Bertimbau Base

In [3]:
checkpoint = 'neuralmind/bert-base-portuguese-cased'
checkpoint_type = 'bert'

### Base model: $x$

In [4]:
train_output = Path('results/models/bertimbau/fold_0')
test_output = Path('results/predictions/bertimbau/fold_0.csv')

###### TRAINING ######
train_corpus = pd.read_json(train_corpus_path)
labels = train_corpus['Label'].unique()
num_labels = len(labels)
label2id = {label: id_ for id_, label in enumerate(labels)}
id2label = {label2id[label]: label for label in label2id}
train_corpus['Label'] = train_corpus['Label'].map(label2id)

data = Dataset.from_pandas(train_corpus[['Text', 'Label']])
data = data.rename_column('Label', 'label')

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


def tokenize_function(examples):
    return tokenizer(examples['Text'])


tokenized_data = data.map(tokenize_function, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint,
                                                           num_labels=num_labels,
                                                           label2id=label2id,
                                                           id2label=id2label)

training_args = TrainingArguments(train_output,
                                  save_strategy='epoch',
                                  save_total_limit=1,
                                  learning_rate=5e-5,
                                  num_train_epochs=5)
trainer = Trainer(model, training_args,
                  train_dataset=tokenized_data,
                  data_collator=data_collator,
                  tokenizer=tokenizer)
trainer.train()

###### PRECITION ######
test_corpus = pd.read_json(test_corpus_path)
test_data = test_corpus['Text'].to_list()

text_classification = pipeline('text-classification',
                               model=model,
                               tokenizer=tokenizer,
                               device=0)
predictions = text_classification(test_data)

results = pd.DataFrame(predictions, index=test_corpus.index)
results = results.drop(columns='score')
results = results.rename(columns={'label': 'Prediction'})
results['Label'] = test_corpus['Label']

test_output.parent.mkdir(parents=True, exist_ok=True)
results.to_csv(test_output, encoding='utf-8')


Map:   0%|          | 0/8825 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/5520 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.6195, 'learning_rate': 4.547101449275363e-05, 'epoch': 0.45}
{'loss': 0.5876, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.478, 'learning_rate': 3.641304347826087e-05, 'epoch': 1.36}
{'loss': 0.4663, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.4057, 'learning_rate': 2.7355072463768118e-05, 'epoch': 2.26}
{'loss': 0.3578, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.3342, 'learning_rate': 1.8297101449275363e-05, 'epoch': 3.17}
{'loss': 0.2873, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.2902, 'learning_rate': 9.239130434782608e-06, 'epoch': 4.08}
{'loss': 0.2208, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'loss': 0.2204, 'learning_rate': 1.8115942028985507e-07, 'epoch': 4.98}
{'train_runtime': 457.7651, 'train_samples_per_second': 96.392, 'train_steps_per_second': 12.059, 'train_loss': 0.387457694782727, 'epoch': 5.0}


### Concatenation: $x\|n$

In [5]:
train_output = Path('results/models/bertimbau_concatenation/fold_0')
test_output = Path('results/predictions/bertimbau_concatenation/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelConcatenation,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.5917, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.4472, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.3392, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.2661, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.2177, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 415.9902, 'train_samples_per_second': 106.072, 'train_steps_per_second': 6.635, 'train_loss': 0.35486069278440613, 'epoch': 5.0}


### Features pooling: $x\|\mathrm{MLP}(n)$

In [6]:
train_output = Path('results/models/bertimbau_features_pooling/fold_0')
test_output = Path('results/predictions/bertimbau_features_pooling/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelFeaturesPooling,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.5898, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.4449, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.3316, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.2714, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.2067, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 416.9316, 'train_samples_per_second': 105.833, 'train_steps_per_second': 6.62, 'train_loss': 0.35074825286865235, 'epoch': 5.0}


### Shared representation: $\mathrm{MLP}(x\|\mathrm{MLP(n)})$

In [7]:
train_output = Path('results/models/bertimbau_shared_representation/fold_0')
test_output = Path('results/predictions/bertimbau_shared_representation/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelSharedRepresentation,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.5914, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.446, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.3407, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.2807, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.2283, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 408.4961, 'train_samples_per_second': 108.018, 'train_steps_per_second': 6.756, 'train_loss': 0.36211222358371903, 'epoch': 5.0}


## Bertimbau Large

In [4]:
checkpoint = 'neuralmind/bert-large-portuguese-cased'
checkpoint_type = 'bert'

In [5]:
train_output = Path('results/models/bertimbau_large/fold_0')
test_output = Path('results/predictions/bertimbau_large/fold_0.csv')

###### TRAINING ######
train_corpus = pd.read_json(train_corpus_path)
labels = train_corpus['Label'].unique()
num_labels = len(labels)
label2id = {label: id_ for id_, label in enumerate(labels)}
id2label = {label2id[label]: label for label in label2id}
train_corpus['Label'] = train_corpus['Label'].map(label2id)

data = Dataset.from_pandas(train_corpus[['Text', 'Label']])
data = data.rename_column('Label', 'label')

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


def tokenize_function(examples):
    return tokenizer(examples['Text'])


tokenized_data = data.map(tokenize_function, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint,
                                                           num_labels=num_labels,
                                                           label2id=label2id,
                                                           id2label=id2label)

training_args = TrainingArguments(train_output,
                                  save_strategy='epoch',
                                  save_total_limit=1,
                                  learning_rate=5e-5,
                                  num_train_epochs=5)
trainer = Trainer(model, training_args,
                  train_dataset=tokenized_data,
                  data_collator=data_collator,
                  tokenizer=tokenizer)
trainer.train()

###### PRECITION ######
test_corpus = pd.read_json(test_corpus_path)
test_data = test_corpus['Text'].to_list()

text_classification = pipeline('text-classification',
                               model=model,
                               tokenizer=tokenizer,
                               device=0)
predictions = text_classification(test_data)

results = pd.DataFrame(predictions, index=test_corpus.index)
results = results.drop(columns='score')
results = results.rename(columns={'label': 'Prediction'})
results['Label'] = test_corpus['Label']

test_output.parent.mkdir(parents=True, exist_ok=True)
results.to_csv(test_output, encoding='utf-8')


Downloading (…)okenizer_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)lve/main/config.json:   0%|          | 0.00/648 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/210k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/8825 [00:00<?, ? examples/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-large-portuguese-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/5520 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


KeyboardInterrupt: 

## Albertina PT-BR Base

In [3]:
checkpoint = 'C:/Users/Márcio Lima/.cache/huggingface/hub/models--PORTULAN--albertina-ptbr-base/snapshots/b6fb59d5f833001988d393a0137c64b4ec641777'
checkpoint_type = 'deberta'

### Base model: $x$

In [9]:
train_output = Path('results/models/albertina_ptbr_base/fold_0')
test_output = Path('results/predictions/albertina_ptbr_base/fold_0.csv')

###### TRAINING ######
train_corpus = pd.read_json(train_corpus_path)
labels = train_corpus['Label'].unique()
num_labels = len(labels)
label2id = {label: id_ for id_, label in enumerate(labels)}
id2label = {label2id[label]: label for label in label2id}
train_corpus['Label'] = train_corpus['Label'].map(label2id)

data = Dataset.from_pandas(train_corpus[['Text', 'Label']])
data = data.rename_column('Label', 'label')

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


def tokenize_function(examples):
    return tokenizer(examples['Text'])


tokenized_data = data.map(tokenize_function, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint,
                                                           num_labels=num_labels,
                                                           label2id=label2id,
                                                           id2label=id2label)

training_args = TrainingArguments(train_output,
                                  save_strategy='epoch',
                                  save_total_limit=1,
                                  learning_rate=5e-5,
                                  num_train_epochs=5)
trainer = Trainer(model, training_args,
                  train_dataset=tokenized_data,
                  data_collator=data_collator,
                  tokenizer=tokenizer)
trainer.train()

###### PRECITION ######
test_corpus = pd.read_json(test_corpus_path)
test_data = test_corpus['Text'].to_list()

text_classification = pipeline('text-classification',
                               model=model,
                               tokenizer=tokenizer,
                               device=0)
predictions = text_classification(test_data)

results = pd.DataFrame(predictions, index=test_corpus.index)
results = results.drop(columns='score')
results = results.rename(columns={'label': 'Prediction'})
results['Label'] = test_corpus['Label']

test_output.parent.mkdir(parents=True, exist_ok=True)
results.to_csv(test_output, encoding='utf-8')


Map:   0%|          | 0/8825 [00:00<?, ? examples/s]

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at C:/Users/Márcio Lima/.cache/huggingface/hub/models--PORTULAN--albertina-ptbr-base/snapshots/b6fb59d5f833001988d393a0137c64b4ec641777 and are newly initialized: ['pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/5520 [00:00<?, ?it/s]

You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.7024, 'learning_rate': 4.547101449275363e-05, 'epoch': 0.45}
{'loss': 0.6947, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.6949, 'learning_rate': 3.641304347826087e-05, 'epoch': 1.36}
{'loss': 0.6959, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.695, 'learning_rate': 2.7355072463768118e-05, 'epoch': 2.26}
{'loss': 0.6938, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.6939, 'learning_rate': 1.8297101449275363e-05, 'epoch': 3.17}
{'loss': 0.6945, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.6937, 'learning_rate': 9.239130434782608e-06, 'epoch': 4.08}
{'loss': 0.694, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'loss': 0.6934, 'learning_rate': 1.8115942028985507e-07, 'epoch': 4.98}
{'train_runtime': 830.6053, 'train_samples_per_second': 53.124, 'train_steps_per_second': 6.646, 'train_loss': 0.6951023893079896, 'epoch': 5.0}


### Concatenation: $x\|n$

In [4]:
train_output = Path('results/models/albertina_ptbr_base_concatenation/fold_0')
test_output = Path('results/predictions/albertina_ptbr_base_concatenation/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelConcatenation,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.7058, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.695, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.6935, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.6933, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.6925, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 752.2847, 'train_samples_per_second': 58.655, 'train_steps_per_second': 3.669, 'train_loss': 0.6955817733985791, 'epoch': 5.0}


### Features pooling: $x\|\mathrm{MLP}(n)$

In [5]:
train_output = Path('results/models/albertina_ptbr_base_features_pooling/fold_0')
test_output = Path('results/predictions/albertina_ptbr_base_features_pooling/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelFeaturesPooling,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.7013, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.6956, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.6946, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.6939, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.6932, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 992.6211, 'train_samples_per_second': 44.453, 'train_steps_per_second': 2.781, 'train_loss': 0.6955286882925724, 'epoch': 5.0}


### Shared representation: $\mathrm{MLP}(x\|\mathrm{MLP(n)})$

In [6]:
train_output = Path('results/models/albertina_ptbr_base_shared_representation/fold_0')
test_output = Path('results/predictions/albertina_ptbr_base_shared_representation/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelSharedRepresentation,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.6941, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.6926, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.6913, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.6911, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.6901, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 970.4675, 'train_samples_per_second': 45.468, 'train_steps_per_second': 2.844, 'train_loss': 0.6917266126992045, 'epoch': 5.0}


## Albertina PT-PT Base

In [3]:
checkpoint = 'C:/Users/Márcio Lima/.cache/huggingface/hub/models--PORTULAN--albertina-ptpt-base/snapshots/b0a8b33132b56c09e6a67fbc24db8c655cae8f76'
checkpoint_type = 'deberta'

### Base model: $x$

In [4]:
train_output = Path('results/models/albertina_ptpt_base/fold_0')
test_output = Path('results/predictions/albertina_ptpt_base/fold_0.csv')

###### TRAINING ######
train_corpus = pd.read_json(train_corpus_path)
labels = train_corpus['Label'].unique()
num_labels = len(labels)
label2id = {label: id_ for id_, label in enumerate(labels)}
id2label = {label2id[label]: label for label in label2id}
train_corpus['Label'] = train_corpus['Label'].map(label2id)

data = Dataset.from_pandas(train_corpus[['Text', 'Label']])
data = data.rename_column('Label', 'label')

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


def tokenize_function(examples):
    return tokenizer(examples['Text'])


tokenized_data = data.map(tokenize_function, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint,
                                                           num_labels=num_labels,
                                                           label2id=label2id,
                                                           id2label=id2label)

training_args = TrainingArguments(train_output,
                                  save_strategy='epoch',
                                  save_total_limit=1,
                                  learning_rate=5e-5,
                                  num_train_epochs=5)
trainer = Trainer(model, training_args,
                  train_dataset=tokenized_data,
                  data_collator=data_collator,
                  tokenizer=tokenizer)
trainer.train()

###### PRECITION ######
test_corpus = pd.read_json(test_corpus_path)
test_data = test_corpus['Text'].to_list()

text_classification = pipeline('text-classification',
                               model=model,
                               tokenizer=tokenizer,
                               device=0)
predictions = text_classification(test_data)

results = pd.DataFrame(predictions, index=test_corpus.index)
results = results.drop(columns='score')
results = results.rename(columns={'label': 'Prediction'})
results['Label'] = test_corpus['Label']

test_output.parent.mkdir(parents=True, exist_ok=True)
results.to_csv(test_output, encoding='utf-8')


Map:   0%|          | 0/8825 [00:00<?, ? examples/s]

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at C:/Users/Márcio Lima/.cache/huggingface/hub/models--PORTULAN--albertina-ptpt-base/snapshots/b0a8b33132b56c09e6a67fbc24db8c655cae8f76 and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/5520 [00:00<?, ?it/s]

You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.702, 'learning_rate': 4.547101449275363e-05, 'epoch': 0.45}
{'loss': 0.6968, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.6973, 'learning_rate': 3.641304347826087e-05, 'epoch': 1.36}
{'loss': 0.697, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.6956, 'learning_rate': 2.7355072463768118e-05, 'epoch': 2.26}
{'loss': 0.6949, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.6947, 'learning_rate': 1.8297101449275363e-05, 'epoch': 3.17}
{'loss': 0.695, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.6945, 'learning_rate': 9.239130434782608e-06, 'epoch': 4.08}
{'loss': 0.6941, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'loss': 0.6936, 'learning_rate': 1.8115942028985507e-07, 'epoch': 4.98}
{'train_runtime': 838.9497, 'train_samples_per_second': 52.596, 'train_steps_per_second': 6.58, 'train_loss': 0.6959295789400737, 'epoch': 5.0}


### Concatenation: $x\|n$

In [4]:
train_output = Path('results/models/albertina_ptpt_base_concatenation/fold_0')
test_output = Path('results/predictions/albertina_ptpt_base_concatenation/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelConcatenation,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.5879, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.4554, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.3538, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.2672, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.2058, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 744.3634, 'train_samples_per_second': 59.279, 'train_steps_per_second': 3.708, 'train_loss': 0.35547824389692667, 'epoch': 5.0}


### Features pooling: $x\|\mathrm{MLP}(n)$

In [5]:
train_output = Path('results/models/albertina_ptpt_base_features_pooling/fold_0')
test_output = Path('results/predictions/albertina_ptpt_base_features_pooling/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelFeaturesPooling,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.5881, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.4583, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.3607, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.2814, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.217, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 740.2392, 'train_samples_per_second': 59.609, 'train_steps_per_second': 3.729, 'train_loss': 0.3624687775321629, 'epoch': 5.0}


### Shared representation: $\mathrm{MLP}(x\|\mathrm{MLP(n)})$

In [4]:
train_output = Path('results/models/albertina_ptpt_base_shared_representation/fold_0')
test_output = Path('results/predictions/albertina_ptpt_base_shared_representation/fold_0.csv')

trained_model = train(train_corpus_path,
                      train_features_path,
                      checkpoint,
                      checkpoint_type,
                      ClassificationModelSharedRepresentation,
                      train_output)
predict(test_corpus_path,
        test_features_path,
        trained_model,
        test_output)

  0%|          | 0/2760 [00:00<?, ?it/s]

{'loss': 0.6731, 'learning_rate': 4.094202898550725e-05, 'epoch': 0.91}
{'loss': 0.6503, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.81}
{'loss': 0.6462, 'learning_rate': 2.282608695652174e-05, 'epoch': 2.72}
{'loss': 0.6426, 'learning_rate': 1.3768115942028985e-05, 'epoch': 3.62}
{'loss': 0.6375, 'learning_rate': 4.710144927536232e-06, 'epoch': 4.53}
{'train_runtime': 749.9174, 'train_samples_per_second': 58.84, 'train_steps_per_second': 3.68, 'train_loss': 0.6492790719737177, 'epoch': 5.0}
