# Severity Classifier

This notebook holds the code for 
- loading the severity dataset using our dataset loading script for severity
- Preprocessing the data which only involves encoding at this stage
- training and evaluating the model with a 10 fold cv

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments
from datasets import load_dataset, load_metric, Dataset

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np

In [2]:
checkpoint = "bert-base-uncased"

In [3]:
raw_dataset = load_dataset("severity_dataset_loading_script/my_dataset_loading_script.py", "severity")

Reusing dataset condemnation_dataset (/home/geev/.cache/huggingface/datasets/condemnation_dataset/severity/1.1.0/7b10a928e05ea497ca05e0d7a5ed7ddc7a92a26ef85781a1f45b0830df05ee81)


In [4]:
raw_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1034
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 259
    })
})

In [5]:
max_rating = max([r["label"] for r in raw_dataset["train"]])
max_rating

2

In [6]:
raw_train_dataset = raw_dataset["train"]
raw_train_dataset[1]

{'text': 'Sad that -JOHN DOE- has been allowed to get away with it. Shame on those who knew',
 'label': 2}

In [7]:
raw_train_dataset.features

{'text': Value(dtype='string', id=None),
 'label': Value(dtype='int64', id=None)}

### Preprocessing & Evaluation

In [8]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True)

tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Loading cached processed dataset at /home/geev/.cache/huggingface/datasets/condemnation_dataset/severity/1.1.0/7b10a928e05ea497ca05e0d7a5ed7ddc7a92a26ef85781a1f45b0830df05ee81/cache-2927e955db9fb168.arrow
Loading cached processed dataset at /home/geev/.cache/huggingface/datasets/condemnation_dataset/severity/1.1.0/7b10a928e05ea497ca05e0d7a5ed7ddc7a92a26ef85781a1f45b0830df05ee81/cache-541bee8b75a6f370.arrow


#### Decide the Average

In [9]:
def evaluate(trainer, tokenized_dataset):
    predictions = trainer.predict(tokenized_dataset)
    preds = np.argmax(predictions.predictions, axis=1)
    def get_metrics(y_pred, y_true):
        metrics ={}
        metrics["accuracy"] = accuracy_score(y_true, y_pred)
        metrics["macro_precision"] = precision_score(y_true, y_pred, average="macro")
        metrics["macro_recall"] = recall_score(y_true, y_pred, average="macro")
        metrics["macro_f1"] = f1_score(y_true, y_pred, average="macro")
        metrics["micro_precision"] = precision_score(y_true, y_pred, average="micro")
        metrics["micro_recall"] = recall_score(y_true, y_pred, average="micro")
        metrics["micro_f1"] = f1_score(y_true, y_pred, average="micro")
        
        return metrics
    return get_metrics(preds, predictions.label_ids)


### Training with CV

In [10]:
LABEL_COL = "label"
train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]
n_split = 10
sss = StratifiedShuffleSplit(n_splits=n_split, test_size=1/n_split, random_state=0)
sss.get_n_splits(train_dataset, train_dataset[LABEL_COL])

10

In [11]:
torch.cuda.empty_cache()

In [12]:
performance = {"test":{}, "val":{}}
fold = 1
for train_index, val_index in sss.split(train_dataset, train_dataset[LABEL_COL]):
    cv_train_dataset = Dataset.from_dict(train_dataset[train_index])
    
    training_args = TrainingArguments("test_trainer")
    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=max_rating+1)
    
    trainer = Trainer(
    model,
    training_args,
    train_dataset = cv_train_dataset,
    data_collator = data_collator,
    tokenizer = tokenizer,
    )
    
    trainer.train()
    cv_val_dataset = Dataset.from_dict(train_dataset[val_index])

    performance["val"]["fold "+str(fold)] = evaluate(trainer, cv_val_dataset)
    performance["test"]["fold "+str(fold)] = evaluate(trainer, test_dataset)
    print("saving a model! for fold {}".format(fold))
    torch.save(model, "./models/fold_{}_model.p".format(fold))
    fold+=1
    del model
    del trainer
    del cv_train_dataset
    del cv_val_dataset
    torch.cuda.empty_cache()
    
performance

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 1


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 2


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 3


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 4


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 6


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 7


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 8


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 9


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 104
  Batch size = 16


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 259
  Batch size = 16


saving a model! for fold 10


{'test': {'fold 1': {'accuracy': 0.6023166023166023,
   'macro_precision': 0.6098997493734336,
   'macro_recall': 0.5933179723502304,
   'macro_f1': 0.5985794577234266,
   'micro_precision': 0.6023166023166023,
   'micro_recall': 0.6023166023166023,
   'micro_f1': 0.6023166023166023},
  'fold 2': {'accuracy': 0.5598455598455598,
   'macro_precision': 0.5528011204481792,
   'macro_recall': 0.5332447122769702,
   'macro_f1': 0.5271586836102965,
   'micro_precision': 0.5598455598455598,
   'micro_recall': 0.5598455598455598,
   'micro_f1': 0.5598455598455598},
  'fold 3': {'accuracy': 0.61003861003861,
   'macro_precision': 0.6245056497175141,
   'macro_recall': 0.5666075859624247,
   'macro_f1': 0.5809378233223632,
   'micro_precision': 0.61003861003861,
   'micro_recall': 0.61003861003861,
   'micro_f1': 0.61003861003861},
  'fold 4': {'accuracy': 0.640926640926641,
   'macro_precision': 0.6555839078401529,
   'macro_recall': 0.6175174288077514,
   'macro_f1': 0.6257374515192975,
   'mi

In [60]:
import pickle
pickle_dump_file = "10_cv_"+checkpoint+"_3_cat_performance.p"
pickle.dump(performance, open(pickle_dump_file, "wb"))

In [61]:
def agg_performance(performance):
    
    agg_performance = {}
    for key in performance:
        agg_performance[key] = {}
        for metric in performance[key]["fold 1"]:
            metric_val_list = [performance[key][fold][metric] for fold in performance[key]]
            agg_performance[key]["avg_"+metric] = "{:.2f}".format(np.mean(metric_val_list))
            agg_performance[key]["std_"+metric] = "{:.2f}".format(np.std(metric_val_list))
    return agg_performance
        
import pprint
pprint.pprint(agg_performance(performance))

{'test': {'avg_accuracy': '0.62',
          'avg_macro_f1': '0.60',
          'avg_macro_precision': '0.63',
          'avg_macro_recall': '0.59',
          'avg_micro_f1': '0.62',
          'avg_micro_precision': '0.62',
          'avg_micro_recall': '0.62',
          'std_accuracy': '0.03',
          'std_macro_f1': '0.03',
          'std_macro_precision': '0.04',
          'std_macro_recall': '0.02',
          'std_micro_f1': '0.03',
          'std_micro_precision': '0.03',
          'std_micro_recall': '0.03'},
 'val': {'avg_accuracy': '0.57',
         'avg_macro_f1': '0.55',
         'avg_macro_precision': '0.57',
         'avg_macro_recall': '0.54',
         'avg_micro_f1': '0.57',
         'avg_micro_precision': '0.57',
         'avg_micro_recall': '0.57',
         'std_accuracy': '0.05',
         'std_macro_f1': '0.06',
         'std_macro_precision': '0.06',
         'std_macro_recall': '0.06',
         'std_micro_f1': '0.05',
         'std_micro_precision': '0.05',
         '