This notebook compares two identical models trained with different data: One with Anger and Joy classes from XED and neutrals also from XED, and the other with the same classes from XED but neutrals from TDT. The trained models are then compared with in-domain data as well as out-of-domain data.

In [1]:
# Set the file paths here
tdt_train_fn = '/content/tdt-sentiment-151020-train-clean.tsv'
tdt_eval_fn = '/content/tdt-sentiment-151020-dev.tsv'
xed_nonneutrals_fn = '/content/fi-annotated.tsv'
xed_neutrals_fn = '/content/neu_fi.txt'
twitter_and_s24_fn = '/content/twitter-and-s24-sentences.tsv'

In [None]:
!pip install transformers

In [3]:
# Choose model and set up input

from sklearn.model_selection import train_test_split
from transformers import BertTokenizerFast
import tensorflow as tf
import random

def transpose(l):
  return [list(t) for t in zip(*l)]

def load_fields(fn):
  return transpose([l.rstrip('\n').split('\t') for l in open(fn).readlines()])

def label_vector(num_labels, labels):
  binary_labels = []
  for ls in labels:
    b = [0]*num_labels
    for l in ls:
      b[l] = 1
    binary_labels.append(b)
  return binary_labels

tdt_train_texts, tdt_train_labels_raw = load_fields(tdt_train_fn)[1:3]
tdt_eval_texts, tdt_eval_labels_raw = load_fields(tdt_eval_fn)[1:3]

nonneutral_texts, nonneutral_labels_raw = load_fields(xed_nonneutrals_fn)[:2]
xed_neutral_texts_full = load_fields(xed_neutrals_fn)[1]

nonneutral_labels = [[int(s) for s in l.replace('8', '0').split(',')] for l in nonneutral_labels_raw]

nonneutral_classes = {1, 5}
encoded_to_index = dict(enumerate(nonneutral_classes))
index_to_encoded = {v: k for k, v in encoded_to_index.items()}
index_to_name = dict(enumerate(['trust', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise']))
num_labels = len(nonneutral_classes)

chosen_nonneutral_texts, chosen_nonneutral_labels = transpose([[t, l] for t, l in zip(nonneutral_texts, nonneutral_labels) if set(l) <= nonneutral_classes])

train_neutral_texts, train_neutral_label_vectors = transpose([[t, [0]*num_labels] for t, l in zip(tdt_train_texts, tdt_train_labels_raw) if l == 'neutral'])
eval_neutral_texts, eval_neutral_label_vectors = transpose([[t, [0]*num_labels] for t, l in zip(tdt_eval_texts, tdt_eval_labels_raw) if l == 'neutral'])

seed = 0
random.seed(seed)
xed_neutral_texts = random.sample(xed_neutral_texts_full, len(train_neutral_texts) + len(eval_neutral_texts))
xed_neutral_label_vectors = [[0]*num_labels]*len(xed_neutral_texts)

chosen_nonneutral_label_vectors = label_vector(num_labels, [[index_to_encoded[e] for e in l] for l in chosen_nonneutral_labels])

train_nonneutral_texts, eval_nonneutral_texts, train_nonneutral_label_vectors, eval_nonneutral_label_vectors = train_test_split(chosen_nonneutral_texts, chosen_nonneutral_label_vectors, test_size=0.1, random_state=seed)
xed_train_neutral_texts, xed_eval_neutral_texts, xed_train_neutral_label_vectors, xed_eval_neutral_label_vectors = train_test_split(xed_neutral_texts, xed_neutral_label_vectors, test_size=len(eval_neutral_texts), random_state=seed)

train_texts = train_neutral_texts + train_nonneutral_texts
train_label_vectors = tf.constant(train_neutral_label_vectors + train_nonneutral_label_vectors, dtype='float32')

eval_texts = eval_neutral_texts + eval_nonneutral_texts
eval_label_vectors = tf.constant(eval_neutral_label_vectors + eval_nonneutral_label_vectors, dtype='float32')

#model_name = "TurkuNLP/bert-base-finnish-cased-v1"
model_name = "TurkuNLP/bert-base-finnish-uncased-v1"

tokenizer = BertTokenizerFast.from_pretrained(model_name)

input_size = 128

train_encodings = tokenizer(train_texts, truncation=True, padding='longest', max_length=input_size)
eval_encodings = tokenizer(eval_texts, truncation=True, padding='longest', max_length=input_size)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=427471.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=818655.0, style=ProgressStyle(descripti…




In [4]:
# Set up training
from transformers import TFBertForSequenceClassification, optimization_tf
from math import inf

init_lr = 2e-5

epochs = 2
batch_size_train = 16
batch_size_eval = 16

def train(model, t, train_labels, eval):
  size_train = len(train_labels)
  steps_per_epoch = int(size_train/batch_size_train)
  steps_train = steps_per_epoch*epochs
  steps_warmup = int(epochs * size_train * 0.1 / batch_size_train)
  optimizer, _ = optimization_tf.create_optimizer(init_lr=init_lr,
                                                  num_train_steps=steps_train,
                                                  num_warmup_steps=steps_warmup,
                                                  weight_decay_rate=0.01)
  model.compile(optimizer=optimizer, loss=tf.nn.sigmoid_cross_entropy_with_logits, metrics=[])
  model.fit(t,
            train_labels,
            validation_data=eval,
            batch_size=batch_size_train,
            epochs=epochs)
  return model

In [5]:
# Set up evaluation
from sklearn.metrics import classification_report, accuracy_score, f1_score
import numpy as np

def train_evaluate(train_x, train_y, eval_x, eval_y, num_labels, run_count):
  runs = []
  for i in range(run_count):
    bert = TFBertForSequenceClassification.from_pretrained(model_name,
                                                           num_labels=num_labels)
    bert = train(bert, train_x, train_y, (eval_x, eval_y))
    runs.append(bert.predict(eval_x)[0])
  return runs

def format_floats(l):
  return ', '.join(f'{x:.4f}' for x in l)

def print_results(runs, eval_label_vectors):
  preds = [(tf.math.sigmoid(r) >= 0.5).numpy().tolist() for r in runs]
  accuracy = [accuracy_score(eval_label_vectors, p) for p in preds]
  weighted_f1 = [f1_score(eval_label_vectors, p, average='weighted') for p in preds]
  print(f"Accuracy: {format_floats(accuracy)}")
  print(f"Weighted F-score: {format_floats(weighted_f1)}")
  print(f'Average accuracy: {np.mean(accuracy):.4f}, stdev: {np.std(accuracy):.4f}')
  print(f'Average weighted F-score: {np.mean(weighted_f1):.4f}, stdev: {np.std(weighted_f1):.4f}')
  max_i = accuracy.index(max(accuracy))
  print(classification_report(eval_label_vectors, preds[max_i], target_names=[index_to_name[encoded_to_index[i]] for i in range(num_labels)], digits=4))

t = [tf.constant(train_encodings.data['input_ids']),
     tf.constant(train_encodings.data['attention_mask']),
     tf.constant(train_encodings.data['token_type_ids'])]

e = [tf.constant(eval_encodings.data['input_ids']),
     tf.constant(eval_encodings.data['attention_mask']),
     tf.constant(eval_encodings.data['token_type_ids'])]

xed_train_texts = xed_train_neutral_texts + train_nonneutral_texts
xed_eval_texts = xed_eval_neutral_texts + eval_nonneutral_texts
xed_train_label_vectors = tf.constant(xed_train_neutral_label_vectors + train_nonneutral_label_vectors, dtype='float32')
xed_eval_label_vectors = tf.constant(xed_eval_neutral_label_vectors + eval_nonneutral_label_vectors, dtype='float32')

xed_train_encodings = tokenizer(xed_train_texts, truncation=True, padding='longest', max_length=input_size)
xed_eval_encodings = tokenizer(xed_eval_texts, truncation=True, padding='longest', max_length=input_size)

t_xed = [tf.constant(xed_train_encodings.data['input_ids']),
         tf.constant(xed_train_encodings.data['attention_mask']),
         tf.constant(xed_train_encodings.data['token_type_ids'])]

e_xed = [tf.constant(xed_eval_encodings.data['input_ids']),
         tf.constant(xed_eval_encodings.data['attention_mask']),
         tf.constant(xed_eval_encodings.data['token_type_ids'])]

In [6]:
# Evaluate with in-domain data

runs_tdt = train_evaluate(t, train_label_vectors, e, eval_label_vectors, num_labels, 3)
runs_xed = train_evaluate(t_xed, xed_train_label_vectors, e_xed, xed_eval_label_vectors, num_labels, 3)

print(f"Model: {model_name}, initial learning rate = {init_lr}, input size = {input_size}, batch size = {batch_size_train}, epochs = {epochs}")
print_results(runs_tdt, eval_label_vectors)
print_results(runs_xed, xed_eval_label_vectors)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=656434900.0, style=ProgressStyle(descri…




All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at TurkuNLP/bert-base-finnish-uncased-v1 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: <cyfunction Socket.send at 0x7f92eebc4e58> is not a module, class, method, function, traceback, frame, or code object
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: <cyfunction Socket.send at 0x7f92eebc4e58> is not a module, class, method, function, traceback, frame, or code object


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).


Cause: while/else statement not yet supported


The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Cause: while/else statement not yet supported


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Epoch 2/2


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at TurkuNLP/bert-base-finnish-uncased-v1 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in g

Epoch 1/2


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Epoch 2/2


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Model: TurkuNLP/bert-base-finnish-uncased-v1, initial learning rate = 2e-05, input size = 128, batch size = 16, epochs = 2
Accuracy: 0.9406
Weighted F-score: 0.8691
Average accuracy: 0.9406, stdev: 0.0000
Average weighted F-score: 0.8691, stdev: 0.0000
              precision    recall  f1-score   support

       anger     0.8852    0.8617    0.8733       188
         joy     0.8442    0.8844    0.8638       147

   micro avg     0.8665    0.8716    0.8690       335
   macro avg     0.8647    0.8730    0.8686       335
weighted avg     0.8672    0.8716    0.8691       335
 samples avg     0.2437    0.2437    0.2436       335

Accuracy: 0.8562
Weighted F-score: 0.7145
Average accuracy: 0.8562, stdev: 0.0000
Average weighted F-score: 0.7145, stdev: 0.0000
              precision    recall  f1-score   support

       anger     0.7160    0.6170    0.6629       188
         joy     0.8000    0.7619    0.7805       147

   micro avg     0.7550    0.6806    0.7159       335
   macro avg     0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
# Evaluate with out-of-domain data

import csv
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

ood_texts = load_fields(twitter_and_s24_fn)[1][1:]

ood_encodings = tokenizer(ood_texts, truncation=True, padding='longest', max_length=input_size)

e_ood = [tf.constant(ood_encodings.data['input_ids']),
         tf.constant(ood_encodings.data['attention_mask']),
         tf.constant(ood_encodings.data['token_type_ids'])]

def print_agreement_results(pred1, pred2):
  accuracy = accuracy_score(pred1, pred2)
  weighted_f1 = f1_score(pred1, pred2, average='weighted')
  print(f"Accuracy: {accuracy}")
  print(f"Weighted F-score: {weighted_f1}")
  print(f"Reverse F-score: {f1_score(pred2, pred1, average='weighted')}")

def list_labels(l):
  return [i for i, v in enumerate(l) if v]

def write_disagreement(texts, pred1, pred2):
  incorrect = [t for t in zip(texts, pred1, pred2) if t[1] != t[2]]
  print(incorrect[:5])
  with open('/content/tdt_xed_disagreement_ood.tsv', 'wt') as file:
    tsv_writer = csv.writer(file, delimiter='\t')
    tsv_writer.writerow(('text', 'tdt_neutrals', 'xed_neutrals'))
    for t in incorrect:
      tsv_writer.writerow(t)

def write_all(texts, pred1, pred2):
  all = [(text, p1[0], p1[1], p2[0], p2[1]) for text, p1, p2 in zip(texts, pred1, pred2)]
  print(all[:5])
  first = index_to_name[encoded_to_index[0]]
  second = index_to_name[encoded_to_index[1]]
  with open('/content/tdt_xed_probabilities_ood.tsv', 'wt') as file:
    tsv_writer = csv.writer(file, delimiter='\t')
    tsv_writer.writerow(('text', 'tdt_neutral_'+first, 'tdt_neutral_'+second, 'xed_neutral_'+first, 'xed_neutral_'+second))
    for t in all:
      tsv_writer.writerow(t)

runs_tdt = train_evaluate(t, train_label_vectors, e_ood, None, num_labels, 1)[0]
runs_xed = train_evaluate(t_xed, xed_train_label_vectors, e_ood, None, num_labels, 1)[0]
pred1 = (tf.math.sigmoid(runs_tdt) >= 0.5).numpy().tolist()
pred2 = (tf.math.sigmoid(runs_xed) >= 0.5).numpy().tolist()
list1 = [','.join(n) if n else 'neutral' for n in [[index_to_name[encoded_to_index[e]] for e in list_labels(l)] for l in pred1]]
list2 = [','.join(n) if n else 'neutral' for n in [[index_to_name[encoded_to_index[e]] for e in list_labels(l)] for l in pred2]]

print(f"Model: {model_name}, initial learning rate = {init_lr}, input size = {input_size}, batch size = {batch_size_train}, epochs = {epochs}")
print_agreement_results(pred1, pred2)
write_disagreement(ood_texts, list1, list2)
write_all(ood_texts, tf.math.sigmoid(runs_tdt).numpy().tolist(), tf.math.sigmoid(runs_xed).numpy().tolist())

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at TurkuNLP/bert-base-finnish-uncased-v1 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Epoch 1/2


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Epoch 2/2


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at TurkuNLP/bert-base-finnish-uncased-v1 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in g

Epoch 1/2


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Epoch 2/2


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Model: TurkuNLP/bert-base-finnish-uncased-v1, initial learning rate = 2e-05, input size = 128, batch size = 16, epochs = 2
Accuracy: 0.6456086286594761
Weighted F-score: 0.4392528362869333
Reverse F-score: 0.4379080857786892
[('vai onko taas, ettei heitä saa sakottaa, kun he tuovat imatralle niin paljon rahaa.......', 'neutral', 'anger'), ('Valitettavasti.', 'anger', 'neutral'), ('Ympäristötukea pitää myöntää sen mukaan, mitkä ovat tuen myöntämisen perusteet.', 'neutral', 'anger'), ('Perusteisiin tuskin kuuluu se, että joku tekee kauppoja suomalaisen (=Suomessa sijaitsevan) telakkateollisuuden kanssa.', 'neutral', 'anger'), ('Kaikki tuet vääristävät markkinoita, minkä vuoksi ne pitäisi pyrkiä kieltämään kansainvälisesti yhteisin sopimuksin.', 'neutral', 'anger')]
[('No kuka hel-vetti sinnekin muka on menossa?', 0.8972923755645752, 0.00721206096932292, 0.8505796790122986, 0.007059789728373289), ('Eilen noin klo 18 poliisien siiviiliskoda ajeli tutkan kanssa pietarintietä edestakaisin ra