In [15]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments
from datasets import load_dataset, load_metric, Dataset

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np

In [16]:
checkpoint = "bert-base-uncased"

In [27]:
raw_dataset = load_dataset("severity_dataset_loading_script/my_dataset_loading_script.py", "severity_4_cat")

Reusing dataset condemnation_dataset (/home/geev/.cache/huggingface/datasets/condemnation_dataset/severity_4_cat/1.1.0/c9775c317dd01ddfaa3b09ba4e6599bd59d800448522ca750e2fa1a2fcbdc9ac)


In [28]:
raw_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1034
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 259
    })
})

In [29]:
max_rating = max([r["label"] for r in raw_dataset["train"]])
max_rating

3

In [30]:
raw_train_dataset = raw_dataset["train"]
raw_train_dataset[1]

{'text': 'Sexual predators <OTHER TARGET 3>, <OTHER TARGET 2>, -JOHN DOE- all lose jobs. Meanwhile, sexual predator @realDonaldTrump still has his.',
 'label': 2}

In [31]:
raw_train_dataset.features

{'text': Value(dtype='string', id=None),
 'label': Value(dtype='int64', id=None)}

### Preprocessing & Evaluation

In [32]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True)

tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /home/geev/.cache/huggingface/transform

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

#### Decide the Average

In [40]:
def evaluate(trainer, tokenized_dataset):
    predictions = trainer.predict(tokenized_dataset)
    preds = np.argmax(predictions.predictions, axis=1)
    def get_metrics(y_pred, y_true):
        metrics ={}
        metrics["accuracy"] = accuracy_score(y_true, y_pred)
        metrics["macro_precision"] = precision_score(y_true, y_pred, average="macro")
        metrics["macro_recall"] = recall_score(y_true, y_pred, average="macro")
        metrics["macro_f1"] = f1_score(y_true, y_pred, average="macro")
        metrics["micro_precision"] = precision_score(y_true, y_pred, average="micro")
        metrics["micro_recall"] = recall_score(y_true, y_pred, average="micro")
        metrics["micro_f1"] = f1_score(y_true, y_pred, average="micro")
        
        return metrics
    return get_metrics(preds, predictions.label_ids)


### Training with CV

In [41]:
LABEL_COL = "label"
train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]
n_split = 10
sss = StratifiedShuffleSplit(n_splits=n_split, test_size=1/n_split, random_state=0)
sss.get_n_splits(train_dataset, train_dataset[LABEL_COL])

10

In [42]:
torch.cuda.empty_cache()

In [43]:
performance = {"test":{}, "val":{}}
fold = 1
for train_index, val_index in sss.split(train_dataset, train_dataset[LABEL_COL]):
    cv_train_dataset = Dataset.from_dict(train_dataset[train_index])
    
    training_args = TrainingArguments("test_trainer")
    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=4)
    
    trainer = Trainer(
    model,
    training_args,
    train_dataset = cv_train_dataset,
    data_collator = data_collator,
    tokenizer = tokenizer,
    )
    
    trainer.train()
    cv_val_dataset = Dataset.from_dict(train_dataset[val_index])

    performance["val"]["fold "+str(fold)] = evaluate(trainer, cv_val_dataset)
    performance["test"]["fold "+str(fold)] = evaluate(trainer, test_dataset)
    fold+=1
    del model
    del trainer
    del cv_train_dataset
    del cv_val_dataset
    torch.cuda.empty_cache()
    
    

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LAB

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_drop

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))


In [44]:
import pickle
pickle_dump_file = "10_cv_"+checkpoint+"_4_cat_performance.p"
pickle.dump(performance, open(pickle_dump_file, "wb"))

In [45]:
def agg_performance(performance):
    
    agg_performance = {}
    for key in performance:
        agg_performance[key] = {}
        for metric in performance[key]["fold 1"]:
            metric_val_list = [performance[key][fold][metric] for fold in performance[key]]
            agg_performance[key]["avg_"+metric] = "{:.2f}".format(np.mean(metric_val_list))
            agg_performance[key]["std_"+metric] = "{:.2f}".format(np.std(metric_val_list))
    return agg_performance
        
import pprint
pprint.pprint(agg_performance(performance))

{'test': {'avg_accuracy': '0.68',
          'avg_macro_f1': '0.35',
          'avg_macro_precision': '0.34',
          'avg_macro_recall': '0.36',
          'avg_micro_f1': '0.68',
          'avg_micro_precision': '0.68',
          'avg_micro_recall': '0.68',
          'std_accuracy': '0.01',
          'std_macro_f1': '0.01',
          'std_macro_precision': '0.04',
          'std_macro_recall': '0.01',
          'std_micro_f1': '0.01',
          'std_micro_precision': '0.01',
          'std_micro_recall': '0.01'},
 'val': {'avg_accuracy': '0.65',
         'avg_macro_f1': '0.32',
         'avg_macro_precision': '0.31',
         'avg_macro_recall': '0.34',
         'avg_micro_f1': '0.65',
         'avg_micro_precision': '0.65',
         'avg_micro_recall': '0.65',
         'std_accuracy': '0.02',
         'std_macro_f1': '0.01',
         'std_macro_precision': '0.01',
         'std_macro_recall': '0.01',
         'std_micro_f1': '0.02',
         'std_micro_precision': '0.02',
         '