In [1]:
from transformers import (RobertaForSequenceClassification,
                          RobertaTokenizer,
                          Trainer,
                          TrainingArguments,
                          EarlyStoppingCallback)
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import torch
import pandas as pd
import datasets

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
dataset_path = './datasets/'

In [4]:
train_dataset = pd.read_csv('./datasets/argquality_train.csv')
train_dataset.drop(['Unnamed: 0'], axis=1, inplace=True)
val_dataset = pd.read_csv('./datasets/argquality_val.csv')
val_dataset.drop(['Unnamed: 0'], axis=1, inplace=True)
test_dataset = pd.read_csv('./datasets/argquality_test.csv')
test_dataset.drop(['Unnamed: 0'], axis=1, inplace=True)

In [5]:
train_dataset = datasets.Dataset.from_pandas(train_dataset)
val_dataset = datasets.Dataset.from_pandas(val_dataset)
test_dataset = datasets.Dataset.from_pandas(test_dataset)

In [6]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initiali

In [16]:
def preprocess_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

In [7]:
training_args = TrainingArguments(
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    learning_rate=2e-5,
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=1000,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=500,
    optim='adamw_hf',
    do_eval=True,
    eval_steps=500,
    evaluation_strategy='steps',
    save_steps=500,
    save_strategy='steps'
)

In [8]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [9]:
tok_train_dataset = train_dataset.map(
            preprocess_function,
            batched=True,
            num_proc=1,
            remove_columns=['text'],
            load_from_cache_file=False,
        )

tok_val_dataset = val_dataset.map(
            preprocess_function,
            batched=True,
            num_proc=1,
            remove_columns=['text'],
            load_from_cache_file=False,
        )

tok_test_dataset = test_dataset.map(
            preprocess_function,
            batched=True,
            num_proc=1,
            remove_columns=['text'],
            load_from_cache_file=False,
        )

  0%|          | 0/19 [00:00<?, ?ba/s]

  0%|          | 0/7 [00:00<?, ?ba/s]

  0%|          | 0/7 [00:00<?, ?ba/s]

In [10]:
trainer = Trainer(
    model=model, 
    args=training_args,
    train_dataset=tok_train_dataset,
    eval_dataset=tok_val_dataset,
    compute_metrics=compute_metrics,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=2)]
)

In [11]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: index. If index are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 18297
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 6864
  Number of trainable parameters = 124647170


  0%|          | 0/6864 [00:00<?, ?it/s]

The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: index. If index are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6100
  Batch size = 8


Attempted to log scalar metric loss:
0.6571
Attempted to log scalar metric learning_rate:
1e-05
Attempted to log scalar metric epoch:
0.22
{'loss': 0.6571, 'learning_rate': 1e-05, 'epoch': 0.22}


  0%|          | 0/763 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-500
Configuration saved in ./results\checkpoint-500\config.json


Attempted to log scalar metric eval_loss:
0.620814859867096
Attempted to log scalar metric eval_accuracy:
0.6752459016393443
Attempted to log scalar metric eval_f1:
0.7798154940535734
Attempted to log scalar metric eval_precision:
0.6618867924528302
Attempted to log scalar metric eval_recall:
0.9488774682174737
Attempted to log scalar metric eval_runtime:
120.8233
Attempted to log scalar metric eval_samples_per_second:
50.487
Attempted to log scalar metric eval_steps_per_second:
6.315
Attempted to log scalar metric epoch:
0.22
{'eval_loss': 0.620814859867096, 'eval_accuracy': 0.6752459016393443, 'eval_f1': 0.7798154940535734, 'eval_precision': 0.6618867924528302, 'eval_recall': 0.9488774682174737, 'eval_runtime': 120.8233, 'eval_samples_per_second': 50.487, 'eval_steps_per_second': 6.315, 'epoch': 0.22}


Model weights saved in ./results\checkpoint-500\pytorch_model.bin
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: index. If index are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6100
  Batch size = 8


Attempted to log scalar metric loss:
0.605
Attempted to log scalar metric learning_rate:
2e-05
Attempted to log scalar metric epoch:
0.44
{'loss': 0.605, 'learning_rate': 2e-05, 'epoch': 0.44}


  0%|          | 0/763 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-1000
Configuration saved in ./results\checkpoint-1000\config.json


Attempted to log scalar metric eval_loss:
0.6545706391334534
Attempted to log scalar metric eval_accuracy:
0.6821311475409836
Attempted to log scalar metric eval_f1:
0.7813240103755498
Attempted to log scalar metric eval_precision:
0.6700193423597679
Attempted to log scalar metric eval_recall:
0.9369759264268326
Attempted to log scalar metric eval_runtime:
119.8909
Attempted to log scalar metric eval_samples_per_second:
50.88
Attempted to log scalar metric eval_steps_per_second:
6.364
Attempted to log scalar metric epoch:
0.44
{'eval_loss': 0.6545706391334534, 'eval_accuracy': 0.6821311475409836, 'eval_f1': 0.7813240103755498, 'eval_precision': 0.6700193423597679, 'eval_recall': 0.9369759264268326, 'eval_runtime': 119.8909, 'eval_samples_per_second': 50.88, 'eval_steps_per_second': 6.364, 'epoch': 0.44}


Model weights saved in ./results\checkpoint-1000\pytorch_model.bin
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: index. If index are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6100
  Batch size = 8


Attempted to log scalar metric loss:
0.6311
Attempted to log scalar metric learning_rate:
1.8294679399727152e-05
Attempted to log scalar metric epoch:
0.66
{'loss': 0.6311, 'learning_rate': 1.8294679399727152e-05, 'epoch': 0.66}


  0%|          | 0/763 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-1500
Configuration saved in ./results\checkpoint-1500\config.json


Attempted to log scalar metric eval_loss:
0.5888082385063171
Attempted to log scalar metric eval_accuracy:
0.6990163934426229
Attempted to log scalar metric eval_f1:
0.7720953326713009
Attempted to log scalar metric eval_precision:
0.7134663913741683
Attempted to log scalar metric eval_recall:
0.8412226129294023
Attempted to log scalar metric eval_runtime:
125.1305
Attempted to log scalar metric eval_samples_per_second:
48.749
Attempted to log scalar metric eval_steps_per_second:
6.098
Attempted to log scalar metric epoch:
0.66
{'eval_loss': 0.5888082385063171, 'eval_accuracy': 0.6990163934426229, 'eval_f1': 0.7720953326713009, 'eval_precision': 0.7134663913741683, 'eval_recall': 0.8412226129294023, 'eval_runtime': 125.1305, 'eval_samples_per_second': 48.749, 'eval_steps_per_second': 6.098, 'epoch': 0.66}


Model weights saved in ./results\checkpoint-1500\pytorch_model.bin
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: index. If index are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6100
  Batch size = 8


Attempted to log scalar metric loss:
0.6061
Attempted to log scalar metric learning_rate:
1.65893587994543e-05
Attempted to log scalar metric epoch:
0.87
{'loss': 0.6061, 'learning_rate': 1.65893587994543e-05, 'epoch': 0.87}


  0%|          | 0/763 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-2000
Configuration saved in ./results\checkpoint-2000\config.json


Attempted to log scalar metric eval_loss:
0.5971350073814392
Attempted to log scalar metric eval_accuracy:
0.7055737704918033
Attempted to log scalar metric eval_f1:
0.7758921886698278
Attempted to log scalar metric eval_precision:
0.7201760481816076
Attempted to log scalar metric eval_recall:
0.8409521233432513
Attempted to log scalar metric eval_runtime:
119.7908
Attempted to log scalar metric eval_samples_per_second:
50.922
Attempted to log scalar metric eval_steps_per_second:
6.369
Attempted to log scalar metric epoch:
0.87
{'eval_loss': 0.5971350073814392, 'eval_accuracy': 0.7055737704918033, 'eval_f1': 0.7758921886698278, 'eval_precision': 0.7201760481816076, 'eval_recall': 0.8409521233432513, 'eval_runtime': 119.7908, 'eval_samples_per_second': 50.922, 'eval_steps_per_second': 6.369, 'epoch': 0.87}


Model weights saved in ./results\checkpoint-2000\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results\checkpoint-1000 (score: 0.7813240103755498).


Attempted to log scalar metric train_runtime:
1678.7333
Attempted to log scalar metric train_samples_per_second:
32.698
Attempted to log scalar metric train_steps_per_second:
4.089
Attempted to log scalar metric total_flos:
4209776885760000.0
Attempted to log scalar metric train_loss:
0.6248420867919922
Attempted to log scalar metric epoch:
0.87
{'train_runtime': 1678.7333, 'train_samples_per_second': 32.698, 'train_steps_per_second': 4.089, 'train_loss': 0.6248420867919922, 'epoch': 0.87}


TrainOutput(global_step=2000, training_loss=0.6248420867919922, metrics={'train_runtime': 1678.7333, 'train_samples_per_second': 32.698, 'train_steps_per_second': 4.089, 'train_loss': 0.6248420867919922, 'epoch': 0.87})

In [12]:
prediction = trainer.predict(tok_test_dataset)

The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: index. If index are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 6100
  Batch size = 8


  0%|          | 0/763 [00:00<?, ?it/s]

In [13]:
compute_metrics(prediction)

{'accuracy': 0.6855737704918032,
 'f1': 0.7857940585213312,
 'precision': 0.6693302891933028,
 'recall': 0.9513250405624663}

In [14]:
trainer.save_model('./results/roberta_base_arg_20/')

Saving model checkpoint to ./results/roberta_base_arg_20/
Configuration saved in ./results/roberta_base_arg_20/config.json
Model weights saved in ./results/roberta_base_arg_20/pytorch_model.bin


In [15]:
BASE_MODEL = 'roberta-base'
tokenizer = RobertaTokenizer.from_pretrained(BASE_MODEL)
tokenizer.save_pretrained('./results/roberta_base_arg_20/')

loading file vocab.json from cache at C:\Users\pEhne/.cache\huggingface\hub\models--roberta-base\snapshots\bc2764f8af2e92b6eb5679868df33e224075ca68\vocab.json
loading file merges.txt from cache at C:\Users\pEhne/.cache\huggingface\hub\models--roberta-base\snapshots\bc2764f8af2e92b6eb5679868df33e224075ca68\merges.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at None
loading configuration file config.json from cache at C:\Users\pEhne/.cache\huggingface\hub\models--roberta-base\snapshots\bc2764f8af2e92b6eb5679868df33e224075ca68\config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_

('./results/roberta_base_arg_20/tokenizer_config.json',
 './results/roberta_base_arg_20/special_tokens_map.json',
 './results/roberta_base_arg_20/vocab.json',
 './results/roberta_base_arg_20/merges.txt',
 './results/roberta_base_arg_20/added_tokens.json')