A straightforward multilabel classification experiment using XED nonneutrals as data and TFBertForSequenceClassification with FinBERT as the model.

In [1]:
# Set the file paths here
xed_nonneutrals_fn = '/content/fi-annotated.tsv'

In [None]:
!pip install transformers

In [6]:
# Choose model and set up input

from sklearn.model_selection import train_test_split
from transformers import BertTokenizerFast
import tensorflow as tf

def transpose(l):
  return [list(t) for t in zip(*l)]

def load_fields(fn):
  return transpose([l.rstrip('\n').split('\t') for l in open(fn).readlines()])

texts, labels_raw = load_fields(xed_nonneutrals_fn)

num_labels = 8

labels = [[int(s) for s in l.replace('8', '0').split(',')] for l in labels_raw]

binary_labels = []
for ls in labels:
  b = [0]*num_labels
  for l in ls:
    b[l] = 1
  binary_labels.append(b)

print(texts[:5])
print(binary_labels[:5])

train_texts, eval_texts, train_labels, eval_labels = train_test_split(texts, binary_labels, test_size=0.1)

model_name = "TurkuNLP/bert-base-finnish-cased-v1"
#model_name = "TurkuNLP/bert-base-finnish-uncased-v1"

tokenizer = BertTokenizerFast.from_pretrained(model_name)

input_size = 64
train_encodings = tokenizer(train_texts, truncation=True, padding='longest', max_length=input_size)
eval_encodings = tokenizer(eval_texts, truncation=True, padding='longest', max_length=input_size)

t = [tf.constant(train_encodings.data['input_ids']),
     tf.constant(train_encodings.data['attention_mask']),
     tf.constant(train_encodings.data['token_type_ids'])]

e = [tf.constant(eval_encodings.data['input_ids']),
     tf.constant(eval_encodings.data['attention_mask']),
     tf.constant(eval_encodings.data['token_type_ids'])]

['Kuinka se vapautui niin nopeasti, mutta sinä et ole liikahtanutkaan?', 'Ruumiita ripustettuina - ruumiita, joiden nahka on nyljetty pois, sydämet revitty rinnoista.', 'Ei mitään muttia.', 'Älä anna hänen määräillä sinua!', 'Laske aseet maahan.']
[[0, 1, 0, 1, 0, 0, 0, 1], [0, 1, 0, 1, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 1, 0, 0, 0]]


In [10]:
# Set up training
from transformers import TFBertForSequenceClassification, optimization_tf

init_lr = 3e-5
epochs = 2
batch_size_train = 16

def train(t, train_labels, eval, num_labels, init_lr, epochs, batch_size_train):
  size_train = len(train_labels)
  steps_per_epoch = int(size_train/batch_size_train)
  steps_train = steps_per_epoch*epochs
  steps_warmup = int(epochs * size_train * 0.1 / batch_size_train)

  model = TFBertForSequenceClassification.from_pretrained(model_name,
                                                          num_labels=num_labels)
  optimizer, _ = optimization_tf.create_optimizer(init_lr=init_lr,
                                                  num_train_steps=steps_train,
                                                  num_warmup_steps=steps_warmup,
                                                  weight_decay_rate=0.01)
  model.compile(optimizer=optimizer,
                loss=tf.nn.sigmoid_cross_entropy_with_logits,
                metrics=[])
  model.fit(t,
            tf.constant(train_labels, dtype='float32'),
            validation_data=(eval[0], tf.constant(eval[1], dtype='float32')),
            batch_size=batch_size_train,
            epochs=epochs)
  return model

In [13]:
# Set up evaluation

from sklearn.metrics import classification_report, accuracy_score, f1_score
import numpy as np

def train_evaluate(runs, train_x, train_y, eval_x, eval_y, num_labels, init_lr, epochs, batch_size_train, run_count):
  for i in range(run_count):
    bert = train(train_x, train_y, (eval_x, eval_y), num_labels, init_lr, epochs, batch_size_train)
    runs.append(bert.predict(eval_x)[0])

def format_floats(l):
  return ', '.join(f'{x:.4f}' for x in l)

def print_results(runs, eval_labels):
  preds = [(tf.math.sigmoid(r) >= 0.5).numpy().tolist() for r in runs]
  accuracy = [accuracy_score(eval_labels, p) for p in preds]
  weighted_f1 = [f1_score(eval_labels, p, average='weighted') for p in preds]
  print(f"Accuracy: {format_floats(accuracy)}")
  print(f"Weighted F-score: {format_floats(weighted_f1)}")
  print(f'Average accuracy: {np.mean(accuracy):.4f}, stdev: {np.std(accuracy):.4f}')
  print(f'Average weighted F-score: {np.mean(weighted_f1):.4f}, stdev: {np.std(weighted_f1):.4f}')
  max_i = accuracy.index(max(accuracy))
  max_p = preds[max_i]
  print(f"Number of sentences predicted neutral: {sum([1 if not 1 in p else 0 for p in max_p])} out of {len(max_p)}")
  print(classification_report(eval_labels, max_p, target_names=['trust', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise'], digits=4))

In [14]:
runs = []
train_evaluate(runs, t, train_labels, e, eval_labels, num_labels, init_lr, epochs, batch_size_train, 3)
print(f"Model: {model_name}, initial learning rate = {init_lr}, input size = {input_size}, batch size = {batch_size_train}, epochs = {epochs}")
print_results(runs, eval_labels)

Model: TurkuNLP/bert-base-finnish-cased-v1, initial learning rate = 3e-05, input size = 64, batch size = 16, epochs = 2
Accuracy: 0.3578, 0.3689, 0.3557
Weighted F-score: 0.4995, 0.5064, 0.4975
Average accuracy: 0.3608, stdev: 0.0058
Average weighted F-score: 0.5011, stdev: 0.0038
Number of sentences predicted neutral: 271 out of 1445
              precision    recall  f1-score   support

       trust     0.6417    0.4918    0.5568       244
       anger     0.6298    0.4755    0.5419       347
anticipation     0.6667    0.3775    0.4821       249
     disgust     0.5714    0.3636    0.4444       220
        fear     0.6535    0.3825    0.4826       217
         joy     0.7088    0.5811    0.6386       222
     sadness     0.6218    0.4512    0.5229       215
    surprise     0.4944    0.2431    0.3259       181

   micro avg     0.6324    0.4285    0.5109      1895
   macro avg     0.6235    0.4208    0.4994      1895
weighted avg     0.6275    0.4285    0.5064      1895
 samples avg 

  _warn_prf(average, modifier, msg_start, len(result))
