# Severity Classifier

This notebook holds the code for 
- loading the severity dataset using our dataset loading script for severity
- Preprocessing the data which only involves encoding at this stage
- training and evaluating the model with a 10 fold cv

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments
from datasets import load_dataset, load_metric, Dataset

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np

In [2]:
checkpoint = "bert-base-uncased"

In [3]:
raw_dataset = load_dataset("severity_dataset_loading_script/my_dataset_loading_script.py", "severity")

Downloading and preparing dataset my_dataset_loading_script/severity to /home/geev/.cache/huggingface/datasets/my_dataset_loading_script/severity/1.1.0/7b10a928e05ea497ca05e0d7a5ed7ddc7a92a26ef85781a1f45b0830df05ee81...


Generating train split: 0 examples [00:00, ? examples/s]

filepath severity_dataset_loading_script/severity_3_cat_train.csv
                tweet_id round  \
0     929106677831528448    r4   
1     916340122420903936    r2   
2     930301336788324352    r4   
3     926015780512256001    r3   
4     928896586083651585    r6   
...                  ...   ...   
1029  917825861021081600    r4   
1030  929335200517181441    r1   
1031  930346596902096896    r4   
1032  930473916639916033    r1   
1033  916461984803119104    r4   

                                                   text  rounded_avg_severity  
0     There have been stories about -JOHN DOE- for y...                     1  
1     Sad that -JOHN DOE- has been allowed to get aw...                     2  
2     the fact that -JOHN DOE- solicited nudes and j...                     2  
3                          That's -JOHN DOE- ruined URL                     0  
4     Just looking at -JOHN DOE-’s boat race makes m...                     2  
...                                          

Generating test split: 0 examples [00:00, ? examples/s]

filepath severity_dataset_loading_script/severity_3_cat_test.csv
               tweet_id round  \
0    928871825643147265    r4   
1    922268362884964353    r6   
2    929925627104387072    r4   
3    928762026050097152    r1   
4    927977580745183232    r4   
..                  ...   ...   
254  930283566528942080    r4   
255  925444114799316992    r4   
256  929235501978083329    r2   
257  929384788338925568    r6   
258  926454374364336129    r2   

                                                  text  rounded_avg_severity  
0    I like a lot of -JOHN DOE-'s stuff, but given ...                     0  
1    Ah yes, -JOHN DOE-, the guy who made a bunch o...                     0  
2    im just reading about -JOHN DOE- and i cant be...                     1  
3    So you republicans defending -JOHN DOE- are co...                     1  
4    it's completely shocking that -JOHN DOE- is ex...                     1  
..                                                 ...          

  0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
raw_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1034
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 259
    })
})

In [5]:
max_rating = max([r["label"] for r in raw_dataset["train"]])
max_rating

2

In [6]:
raw_train_dataset = raw_dataset["train"]
raw_train_dataset[1]

{'text': 'Sad that -JOHN DOE- has been allowed to get away with it. Shame on those who knew',
 'label': 2}

In [7]:
raw_train_dataset.features

{'text': Value(dtype='string', id=None),
 'label': Value(dtype='int64', id=None)}

### Preprocessing & Evaluation

In [8]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True)

tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

#### Decide the Average

In [9]:
def evaluate(trainer, tokenized_dataset):
    predictions = trainer.predict(tokenized_dataset)
    preds = np.argmax(predictions.predictions, axis=1)
    def get_metrics(y_pred, y_true):
        metrics ={}
        metrics["accuracy"] = accuracy_score(y_true, y_pred)
        metrics["macro_precision"] = precision_score(y_true, y_pred, average="macro")
        metrics["macro_recall"] = recall_score(y_true, y_pred, average="macro")
        metrics["macro_f1"] = f1_score(y_true, y_pred, average="macro")
        metrics["micro_precision"] = precision_score(y_true, y_pred, average="micro")
        metrics["micro_recall"] = recall_score(y_true, y_pred, average="micro")
        metrics["micro_f1"] = f1_score(y_true, y_pred, average="micro")
        
        return metrics
    return get_metrics(preds, predictions.label_ids)


### Training with CV

In [10]:
LABEL_COL = "label"
train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]
n_split = 10
sss = StratifiedShuffleSplit(n_splits=n_split, test_size=1/n_split, random_state=0)
sss.get_n_splits(train_dataset, train_dataset[LABEL_COL])

10

In [11]:
torch.cuda.empty_cache()

In [14]:
performance = {"test":{}, "val":{}}
fold = 1
for train_index, val_index in sss.split(train_dataset, train_dataset[LABEL_COL]):
    cv_train_dataset = Dataset.from_dict(train_dataset[train_index])
    
    training_args = TrainingArguments("test_trainer")
    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=max_rating+1)
    
    trainer = Trainer(
    model,
    training_args,
    train_dataset = cv_train_dataset,
    data_collator = data_collator,
    tokenizer = tokenizer,
    )
    
    trainer.train()
    cv_val_dataset = Dataset.from_dict(train_dataset[val_index])

    performance["val"]["fold "+str(fold)] = evaluate(trainer, cv_val_dataset)
    performance["test"]["fold "+str(fold)] = evaluate(trainer, test_dataset)
    print("saving a model! for fold {}".format(fold))
    trainer.save_model("./models/hf/hf_fold_{}_model".format(fold))

    torch.save(model, "./models/fold_{}_model.p".format(fold))
    fold+=1
    del model
    del trainer
    del cv_train_dataset
    del cv_val_dataset
    torch.cuda.empty_cache()
    
performance

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size"

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_1_model
Configuration saved in ./models/hf/hf_fold_1_model/config.json


saving a model! for fold 1


Model weights saved in ./models/hf/hf_fold_1_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_1_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_1_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_2_model
Configuration saved in ./models/hf/hf_fold_2_model/config.json


saving a model! for fold 2


Model weights saved in ./models/hf/hf_fold_2_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_2_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_2_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_3_model
Configuration saved in ./models/hf/hf_fold_3_model/config.json


saving a model! for fold 3


Model weights saved in ./models/hf/hf_fold_3_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_3_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_3_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_4_model
Configuration saved in ./models/hf/hf_fold_4_model/config.json


saving a model! for fold 4


Model weights saved in ./models/hf/hf_fold_4_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_4_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_4_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_5_model
Configuration saved in ./models/hf/hf_fold_5_model/config.json


saving a model! for fold 5


Model weights saved in ./models/hf/hf_fold_5_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_5_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_5_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_6_model
Configuration saved in ./models/hf/hf_fold_6_model/config.json


saving a model! for fold 6


Model weights saved in ./models/hf/hf_fold_6_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_6_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_6_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_7_model
Configuration saved in ./models/hf/hf_fold_7_model/config.json


saving a model! for fold 7


Model weights saved in ./models/hf/hf_fold_7_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_7_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_7_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_8_model
Configuration saved in ./models/hf/hf_fold_8_model/config.json


saving a model! for fold 8


Model weights saved in ./models/hf/hf_fold_8_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_8_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_8_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_9_model
Configuration saved in ./models/hf/hf_fold_9_model/config.json


saving a model! for fold 9


Model weights saved in ./models/hf/hf_fold_9_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_9_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_9_model/special_tokens_map.json
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/geev/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradien

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 104
  Batch size = 8


The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 259
  Batch size = 8
Saving model checkpoint to ./models/hf/hf_fold_10_model
Configuration saved in ./models/hf/hf_fold_10_model/config.json


saving a model! for fold 10


Model weights saved in ./models/hf/hf_fold_10_model/pytorch_model.bin
tokenizer config file saved in ./models/hf/hf_fold_10_model/tokenizer_config.json
Special tokens file saved in ./models/hf/hf_fold_10_model/special_tokens_map.json


{'test': {'fold 1': {'accuracy': 0.6177606177606177,
   'macro_precision': 0.6515596841718896,
   'macro_recall': 0.5752806333451495,
   'macro_f1': 0.5922751039383286,
   'micro_precision': 0.6177606177606177,
   'micro_recall': 0.6177606177606177,
   'micro_f1': 0.6177606177606177},
  'fold 2': {'accuracy': 0.61003861003861,
   'macro_precision': 0.6062909958258795,
   'macro_recall': 0.597837646224743,
   'macro_f1': 0.5991535774144469,
   'micro_precision': 0.61003861003861,
   'micro_recall': 0.61003861003861,
   'micro_f1': 0.61003861003861},
  'fold 3': {'accuracy': 0.6138996138996139,
   'macro_precision': 0.6195455960161843,
   'macro_recall': 0.5880834219543898,
   'macro_f1': 0.599288086901011,
   'micro_precision': 0.6138996138996139,
   'micro_recall': 0.6138996138996139,
   'micro_f1': 0.6138996138996139},
  'fold 4': {'accuracy': 0.637065637065637,
   'macro_precision': 0.6477772545927735,
   'macro_recall': 0.6148292567647405,
   'macro_f1': 0.622556226645446,
   'micro

In [60]:
import pickle
pickle_dump_file = "10_cv_"+checkpoint+"_3_cat_performance.p"
pickle.dump(performance, open(pickle_dump_file, "wb"))

In [61]:
def agg_performance(performance):
    
    agg_performance = {}
    for key in performance:
        agg_performance[key] = {}
        for metric in performance[key]["fold 1"]:
            metric_val_list = [performance[key][fold][metric] for fold in performance[key]]
            agg_performance[key]["avg_"+metric] = "{:.2f}".format(np.mean(metric_val_list))
            agg_performance[key]["std_"+metric] = "{:.2f}".format(np.std(metric_val_list))
    return agg_performance
        
import pprint
pprint.pprint(agg_performance(performance))

{'test': {'avg_accuracy': '0.62',
          'avg_macro_f1': '0.60',
          'avg_macro_precision': '0.63',
          'avg_macro_recall': '0.59',
          'avg_micro_f1': '0.62',
          'avg_micro_precision': '0.62',
          'avg_micro_recall': '0.62',
          'std_accuracy': '0.03',
          'std_macro_f1': '0.03',
          'std_macro_precision': '0.04',
          'std_macro_recall': '0.02',
          'std_micro_f1': '0.03',
          'std_micro_precision': '0.03',
          'std_micro_recall': '0.03'},
 'val': {'avg_accuracy': '0.57',
         'avg_macro_f1': '0.55',
         'avg_macro_precision': '0.57',
         'avg_macro_recall': '0.54',
         'avg_micro_f1': '0.57',
         'avg_micro_precision': '0.57',
         'avg_micro_recall': '0.57',
         'std_accuracy': '0.05',
         'std_macro_f1': '0.06',
         'std_macro_precision': '0.06',
         'std_macro_recall': '0.06',
         'std_micro_f1': '0.05',
         'std_micro_precision': '0.05',
         '