<h3>Clone repository</h3>

In [1]:
!git clone https://github.com/CLARIN-PL/personalized-nlp.git

Cloning into 'personalized-nlp'...
remote: Enumerating objects: 4368, done.[K
remote: Counting objects: 100% (1995/1995), done.[K
remote: Compressing objects: 100% (754/754), done.[K
remote: Total 4368 (delta 1292), reused 1794 (delta 1215), pack-reused 2373[K
Receiving objects: 100% (4368/4368), 46.76 MiB | 22.16 MiB/s, done.
Resolving deltas: 100% (2719/2719), done.


<h3>Download data</h3>

In [2]:
!gdown 1CyTSYita72HlfAytIPEl-2XGNOD05i5q -O /content/personalized-nlp/storage/data/unhealthy_conversations/
!gdown 1VHwrDvYx_LtFeKMaGOsD0xC5MSaGgv2H -O /content/personalized-nlp/storage/data/unhealthy_conversations/

Downloading...
From: https://drive.google.com/uc?id=1CyTSYita72HlfAytIPEl-2XGNOD05i5q
To: /content/personalized-nlp/storage/data/unhealthy_conversations/annotations_users_folds.csv
100% 12.7M/12.7M [00:00<00:00, 129MB/s]
Downloading...
From: https://drive.google.com/uc?id=1VHwrDvYx_LtFeKMaGOsD0xC5MSaGgv2H
To: /content/personalized-nlp/storage/data/unhealthy_conversations/texts_processed.csv
100% 6.24M/6.24M [00:00<00:00, 194MB/s]


<h3>Change working directory</h3>

In [3]:
%cd personalized-nlp

/content/personalized-nlp


<h3>Install required libraries</h3>

In [4]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fasttext==0.9.2 (from -r requirements.txt (line 1))
  Downloading fasttext-0.9.2.tar.gz (68 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.8/68.8 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pandas==1.4.2 (from -r requirements.txt (line 2))
  Downloading pandas-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.7/11.7 MB[0m [31m82.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pytorch-lightning==1.6.0 (from -r requirements.txt (line 3))
  Downloading pytorch_lightning-1.6.0-py3-none-any.whl (582 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m582.1/582.1 kB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scikit-learn==1.0.2 (from -r requirements.txt (line 4))
 

In [5]:
!pip uninstall torchtext -y

Found existing installation: torchtext 0.15.2
Uninstalling torchtext-0.15.2:
  Successfully uninstalled torchtext-0.15.2


<h3>Declare data module for Unhealthy Conversations dataset</h3>

In [6]:
from typing import List

import pandas as pd
import os
from pathlib import Path

from settings import DATA_DIR
from personalized_nlp.datasets.datamodule_base import BaseDataModule


class UnhealthyDataModule(BaseDataModule):
    @property
    def data_dir(self) -> Path:
        return DATA_DIR / "unhealthy_conversations"

    @property
    def annotations_file(self) -> str:
        return f"annotations_{self.stratify_folds_by}_folds.csv"

    @property
    def data_file(self) -> str:
        return f"texts_processed.csv"

    @property
    def annotation_columns(self) -> List[str]:
        return [
            "antagonize",
            "condescending",
            "dismissive",
            "generalisation",
            "generalisation_unfair",
            "healthy",
            "hostile",
            "sarcastic",
        ]

    @property
    def class_dims(self):
        return [2] * 8

    def __init__(
        self,
        gpt_personalized=False,
        **kwargs,
    ):
        self.gpt_personalized = gpt_personalized
        super().__init__(**kwargs)

        os.makedirs(self.data_dir / "embeddings", exist_ok=True)

    def prepare_data(self) -> None:
        columns_map = {"comment": "text"}
        self.data = pd.read_csv(self.data_dir / self.data_file).dropna()
        self.data = self.data.rename(columns=columns_map)

        self.annotations = pd.read_csv(self.data_dir / self.annotations_file)

        self.annotations = self.annotations.drop_duplicates(
            subset=["text_id", "annotator_id"]
        )

<h3>Import necessary libraries and modules</h3>

In [7]:
import os
from itertools import product
from operator import itemgetter

from settings import TRANSFORMER_MODEL_STRINGS

from personalized_nlp.learning.train import train_test
from settings import LOGS_DIR
from personalized_nlp.utils import seed_everything
from personalized_nlp.utils.experiments import product_kwargs
from personalized_nlp.utils.callbacks.outputs import SaveOutputsLocal
from pytorch_lightning import loggers as pl_loggers

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["WANDB_START_METHOD"] = "thread"

<h3>Run experiments</h3>

In [35]:
  wandb_project_name = "unhealthy-kk"
  datamodule_cls = UnhealthyDataModule

  datamodule_kwargs_list = product_kwargs(
      {
          "regression": [False],
          "embeddings_type": ["labse", "mpnet", "xlmr", "random", "skipgram", "bert"][
              -1:
          ],
          "limit_past_annotations_list": [None],
          "stratify_folds_by": ["users", "texts"][:1],
          "folds_num": [5],
          "batch_size": [3000],
          "test_fold": list(range(5)),
          "use_finetuned_embeddings": [False],
          "use_cuda": [True]
      }
  )
  model_kwargs_list = product_kwargs(
      {
          "huggingface_model_name": [TRANSFORMER_MODEL_STRINGS["labse"]],
          "append_annotator_ids": [False, True][:1],
          "use_cuda": [True],
          "embedding_dim": [50],
          "dp_emb": [0.25],
          "dp": [0.0],
          "hidden_dim": [100]
      }
  )
  trainer_kwargs_list = product_kwargs(
      {
          "epochs": [20],
          "lr": [0.008],
          "regression": [False],
          "use_cuda": [True],
          "model_type": list(itemgetter(0, 3)(["baseline", "onehot", "peb", "bias", "embedding"])),
      }
  )

  for datamodule_kwargs in datamodule_kwargs_list:
      seed_everything()
      data_module = datamodule_cls(**datamodule_kwargs)

      for model_kwargs, trainer_kwargs in product(
          model_kwargs_list,
          trainer_kwargs_list,
      ):
          hparams = {
              "dataset": type(data_module).__name__,
              **datamodule_kwargs,
              **model_kwargs,
              **trainer_kwargs,
          }

          logger = pl_loggers.WandbLogger(
              save_dir=str(LOGS_DIR),
              config=hparams,
              project=wandb_project_name,
              log_model=False,
          )

          train_test(
              datamodule=data_module,
              model_kwargs=model_kwargs,
              logger=logger,
              custom_callbacks=[SaveOutputsLocal(f"uc_outputs")],
              **trainer_kwargs,
          )

          logger.experiment.finish()


INFO:pytorch_lightning.utilities.seed:Global seed set to 22


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | Baseline   | 12.3 K
1 | metrics | ModuleDict | 0     
---------------------------------------
12.3 K    Trainable params
0         Non-trainable params
12.3 K    Total params
0.049     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/fine-dew-7/epoch=16-step=391.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/fine-dew-7/epoch=16-step=391.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████▇
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,16.0
test_accuracy_antagonize,0.87438
test_accuracy_condescending,0.84902
test_accuracy_dismissive,0.90119
test_accuracy_generalisation,0.93617
test_accuracy_generalisation_unfair,0.94579
test_accuracy_healthy,0.82629
test_accuracy_hostile,0.90119
test_accuracy_sarcastic,0.8531
test_f1_antagonize_0,0.93184


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | HuBiSimple | 87.9 K
1 | metrics | ModuleDict | 0     
---------------------------------------
87.9 K    Trainable params
0         Non-trainable params
87.9 K    Total params
0.352     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/giddy-grass-8/epoch=16-step=391.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/giddy-grass-8/epoch=16-step=391.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████▇
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,16.0
test_accuracy_antagonize,0.89129
test_accuracy_condescending,0.86884
test_accuracy_dismissive,0.91023
test_accuracy_generalisation,0.93792
test_accuracy_generalisation_unfair,0.94899
test_accuracy_healthy,0.86389
test_accuracy_hostile,0.90761
test_accuracy_sarcastic,0.85748
test_f1_antagonize_0,0.93965


INFO:pytorch_lightning.utilities.seed:Global seed set to 22


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | Baseline   | 12.3 K
1 | metrics | ModuleDict | 0     
---------------------------------------
12.3 K    Trainable params
0         Non-trainable params
12.3 K    Total params
0.049     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/brisk-elevator-9/epoch=16-step=408.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/brisk-elevator-9/epoch=16-step=408.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▇
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,16.0
test_accuracy_antagonize,0.87621
test_accuracy_condescending,0.86249
test_accuracy_dismissive,0.88622
test_accuracy_generalisation,0.93625
test_accuracy_generalisation_unfair,0.94025
test_accuracy_healthy,0.77816
test_accuracy_hostile,0.89994
test_accuracy_sarcastic,0.84391
test_f1_antagonize_0,0.93244


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | HuBiSimple | 87.9 K
1 | metrics | ModuleDict | 0     
---------------------------------------
87.9 K    Trainable params
0         Non-trainable params
87.9 K    Total params
0.352     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/atomic-sun-10/epoch=16-step=408.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/atomic-sun-10/epoch=16-step=408.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▇
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,16.0
test_accuracy_antagonize,0.88679
test_accuracy_condescending,0.88422
test_accuracy_dismissive,0.90194
test_accuracy_generalisation,0.93654
test_accuracy_generalisation_unfair,0.94154
test_accuracy_healthy,0.86421
test_accuracy_hostile,0.90452
test_accuracy_sarcastic,0.86192
test_f1_antagonize_0,0.93629


INFO:pytorch_lightning.utilities.seed:Global seed set to 22


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | Baseline   | 12.3 K
1 | metrics | ModuleDict | 0     
---------------------------------------
12.3 K    Trainable params
0         Non-trainable params
12.3 K    Total params
0.049     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/worldly-elevator-11/epoch=13-step=336.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/worldly-elevator-11/epoch=13-step=336.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▆
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,13.0
test_accuracy_antagonize,0.85744
test_accuracy_condescending,0.86018
test_accuracy_dismissive,0.89582
test_accuracy_generalisation,0.89411
test_accuracy_generalisation_unfair,0.89805
test_accuracy_healthy,0.8379
test_accuracy_hostile,0.90918
test_accuracy_sarcastic,0.86943
test_f1_antagonize_0,0.92103


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | HuBiSimple | 87.9 K
1 | metrics | ModuleDict | 0     
---------------------------------------
87.9 K    Trainable params
0         Non-trainable params
87.9 K    Total params
0.352     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/fresh-firefly-12/epoch=13-step=336.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/fresh-firefly-12/epoch=13-step=336.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▆
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,13.0
test_accuracy_antagonize,0.8852
test_accuracy_condescending,0.88314
test_accuracy_dismissive,0.90576
test_accuracy_generalisation,0.91021
test_accuracy_generalisation_unfair,0.90987
test_accuracy_healthy,0.87354
test_accuracy_hostile,0.91912
test_accuracy_sarcastic,0.87766
test_f1_antagonize_0,0.93509


INFO:pytorch_lightning.utilities.seed:Global seed set to 22


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | Baseline   | 12.3 K
1 | metrics | ModuleDict | 0     
---------------------------------------
12.3 K    Trainable params
0         Non-trainable params
12.3 K    Total params
0.049     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/lyric-wildflower-13/epoch=8-step=216.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/lyric-wildflower-13/epoch=8-step=216.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▄
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,8.0
test_accuracy_antagonize,0.83748
test_accuracy_condescending,0.82421
test_accuracy_dismissive,0.86926
test_accuracy_generalisation,0.91736
test_accuracy_generalisation_unfair,0.92192
test_accuracy_healthy,0.76423
test_accuracy_hostile,0.89829
test_accuracy_sarcastic,0.84107
test_f1_antagonize_0,0.9092


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | HuBiSimple | 87.9 K
1 | metrics | ModuleDict | 0     
---------------------------------------
87.9 K    Trainable params
0         Non-trainable params
87.9 K    Total params
0.352     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/fast-meadow-14/epoch=10-step=264.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/fast-meadow-14/epoch=10-step=264.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▅
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,10.0
test_accuracy_antagonize,0.86899
test_accuracy_condescending,0.85462
test_accuracy_dismissive,0.89525
test_accuracy_generalisation,0.92261
test_accuracy_generalisation_unfair,0.92855
test_accuracy_healthy,0.83858
test_accuracy_hostile,0.90077
test_accuracy_sarcastic,0.85683
test_f1_antagonize_0,0.92345


INFO:pytorch_lightning.utilities.seed:Global seed set to 22


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | Baseline   | 12.3 K
1 | metrics | ModuleDict | 0     
---------------------------------------
12.3 K    Trainable params
0         Non-trainable params
12.3 K    Total params
0.049     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/different-sea-15/epoch=16-step=408.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/different-sea-15/epoch=16-step=408.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▇
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,16.0
test_accuracy_antagonize,0.83656
test_accuracy_condescending,0.80919
test_accuracy_dismissive,0.88179
test_accuracy_generalisation,0.89931
test_accuracy_generalisation_unfair,0.91098
test_accuracy_healthy,0.70631
test_accuracy_hostile,0.85589
test_accuracy_sarcastic,0.81175
test_f1_antagonize_0,0.90876


  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | model   | HuBiSimple | 87.9 K
1 | metrics | ModuleDict | 0     
---------------------------------------
87.9 K    Trainable params
0         Non-trainable params
87.9 K    Total params
0.352     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/personalized-nlp/storage/checkpoints/prime-forest-16/epoch=14-step=360.ckpt
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from checkpoint at /content/personalized-nlp/storage/checkpoints/prime-forest-16/epoch=14-step=360.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▆
test_accuracy_antagonize,▁
test_accuracy_condescending,▁
test_accuracy_dismissive,▁
test_accuracy_generalisation,▁
test_accuracy_generalisation_unfair,▁
test_accuracy_healthy,▁
test_accuracy_hostile,▁
test_accuracy_sarcastic,▁
test_f1_antagonize_0,▁

0,1
epoch,14.0
test_accuracy_antagonize,0.86063
test_accuracy_condescending,0.85881
test_accuracy_dismissive,0.89347
test_accuracy_generalisation,0.90733
test_accuracy_generalisation_unfair,0.91773
test_accuracy_healthy,0.80956
test_accuracy_hostile,0.86939
test_accuracy_sarcastic,0.84057
test_f1_antagonize_0,0.92025


<h3>Download results from Weights&Biases</h3>

In [36]:
import pandas as pd
from personalized_nlp.utils.wandb_download import get_wandb_project_rows

USERNAME = "persemo"
result = get_wandb_project_rows(f"{USERNAME}/{wandb_project_name}")

macro_f1_cols = [col for col in result if 'macro_f1' in col]
accuracy_cols = [col for col in result if 'accuracy' in col]


<h3>Analyze results</h3>

In [44]:
macro_f1_df = pd.DataFrame({model_type: result[result['model_type'] == model_type][macro_f1_cols]
              .mean(axis=0).round(4).tolist() for model_type in result.model_type.unique().tolist()})
macro_f1_df.index = macro_f1_cols
macro_f1_df['diff'] = macro_f1_df['bias'] - macro_f1_df['baseline']

macro_f1_df

Unnamed: 0,bias,baseline,diff
test_macro_f1_generalisation_unfair,0.6236,0.5188,0.1048
test_macro_f1_dismissive,0.7009,0.5684,0.1325
test_macro_f1_hostile,0.6869,0.5932,0.0937
test_macro_f1_sarcastic,0.6346,0.4833,0.1513
test_macro_f1_condescending,0.7233,0.5789,0.1444
test_macro_f1_generalisation,0.6298,0.537,0.0928
test_macro_f1_antagonize,0.7097,0.5777,0.132
test_macro_f1_healthy,0.7535,0.5367,0.2168


In [45]:
acc_df = pd.DataFrame({model_type: result[result['model_type'] == model_type][accuracy_cols]
              .mean(axis=0).round(4).tolist() for model_type in result.model_type.unique().tolist()})
acc_df.index = accuracy_cols
acc_df['diff'] = acc_df['bias'] - acc_df['baseline']

acc_df

Unnamed: 0,bias,baseline,diff
test_accuracy_hostile,0.9003,0.8929,0.0074
test_accuracy_sarcastic,0.8589,0.8439,0.015
test_accuracy_dismissive,0.9013,0.8869,0.0144
test_accuracy_healthy,0.85,0.7826,0.0674
test_accuracy_generalisation_unfair,0.9293,0.9234,0.0059
test_accuracy_generalisation,0.9229,0.9166,0.0063
test_accuracy_condescending,0.8699,0.841,0.0289
test_accuracy_antagonize,0.8786,0.8564,0.0222
