In [1]:
hub_token = "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"


In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification


tokenizer = AutoTokenizer.from_pretrained(
    "cardiffnlp/twitter-roberta-base-2021-124m", normalization=True
)
model = AutoModelForSequenceClassification.from_pretrained(
    "cardiffnlp/twitter-roberta-base-2021-124m", num_labels=3
)


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-2021-124m were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-2021-124m and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.o

In [3]:
import pandas as pd
from datasets import Dataset, DatasetDict

dataframe = pd.concat(
    [
        pd.read_csv("./data/hateval/hateval2019_en_train.csv"),
        pd.read_csv("./data/hateval/hateval2019_en_dev.csv"),
        pd.read_csv("./data/hateval/hateval2019_en_test.csv"),
    ],
    keys=["train", "validation", "test"],
    names=["split", "index"],
).reset_index()

hypotheses = pd.Series(
    [
        "It is hate speech.",
        "This sentence contains hate speech.",
        "This sentence contains offensive language toward women or immigrants.",
    ],
    name="hypothesis",
)

dataframe = dataframe.merge(hypotheses, how="cross").rename(columns={"text": "premise"})
dataframe["label_categorical"] = dataframe["HS"] * (-2) + 2
dataframe = dataframe[["split", "id", "premise", "hypothesis", "label_categorical"]]

datasets = DatasetDict(
    {
        split: Dataset.from_pandas(dataframe[dataframe["split"] == split])
        for split in ["train", "validation", "test"]
    }
)
datasets


DatasetDict({
    train: Dataset({
        features: ['split', 'id', 'premise', 'hypothesis', 'label_categorical', '__index_level_0__'],
        num_rows: 27000
    })
    validation: Dataset({
        features: ['split', 'id', 'premise', 'hypothesis', 'label_categorical', '__index_level_0__'],
        num_rows: 3000
    })
    test: Dataset({
        features: ['split', 'id', 'premise', 'hypothesis', 'label_categorical', '__index_level_0__'],
        num_rows: 9000
    })
})

In [4]:
import numpy as np


def indice2logits(indice, num_classes):
    indice = np.array(indice)
    logits = np.zeros([len(indice), num_classes], dtype=float)
    logits[np.arange(len(indice)), indice] = 1.0
    return {"label_logits": logits}


datasets = datasets.map(
    lambda rec: tokenizer(
        rec["premise"],
        rec["hypothesis"],
        padding="longest",
        max_length=512,
        pad_to_multiple_of=8,
        return_token_type_ids=True,
        return_attention_mask=True,
    ),
)

datasets = datasets.map(
    lambda rec: indice2logits(rec["label_categorical"], 3),
    batched=True,
    batch_size=1024,
)

datasets = datasets.rename_column("label_logits", "labels")
datasets


  0%|          | 0/27000 [00:00<?, ?ex/s]

  0%|          | 0/3000 [00:00<?, ?ex/s]

  0%|          | 0/9000 [00:00<?, ?ex/s]

  0%|          | 0/27 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/9 [00:00<?, ?ba/s]

DatasetDict({
    train: Dataset({
        features: ['split', 'id', 'premise', 'hypothesis', 'label_categorical', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 27000
    })
    validation: Dataset({
        features: ['split', 'id', 'premise', 'hypothesis', 'label_categorical', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 3000
    })
    test: Dataset({
        features: ['split', 'id', 'premise', 'hypothesis', 'label_categorical', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 9000
    })
})

In [5]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score


def compute_metrics(eval_preds):
    pred_logits, label_logits = eval_preds
    preds = pred_logits.argmax(axis=1)
    labels = label_logits.argmax(axis=1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="macro"),
    }


In [14]:
from transformers import TrainingArguments, Trainer
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"


training_args = TrainingArguments(
    output_dir="outputs/time_lm_nli",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    num_train_epochs=30,
    logging_strategy="epoch",
    remove_unused_columns=True,
    per_device_train_batch_size=32,
    gradient_accumulation_steps=4,
    eval_accumulation_steps=128,
    optim="adamw_apex_fused",
    bf16=True,
    tf32=True,
    learning_rate=1e-6,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=True,
    hub_strategy="all_checkpoints",
    hub_model_id="ChrisZeng/twitter-roberta-base-efl-hateval",
    hub_token=hub_token,
)

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=datasets["train"],
    eval_dataset=datasets["validation"],
    compute_metrics=compute_metrics,
)

trainer_output = trainer.train(
     resume_from_checkpoint=True,
)
trainer.save_model()


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
/home/chris-zeng/csci544-project/outputs/time_lm_nli is already a clone of https://huggingface.co/ChrisZeng/twitter-roberta-base-efl-hateval. Make sure you pull the latest changes with `repo.git_pull()`.
Using amp half precision backend
Loading model from outputs/time_lm_nli/checkpoint-6330).
The following columns in the training set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: hypothesis, __index_level_0__, premise, label_categorical, id, split. If hypothesis, __index_level_0__, premise, label_categorical, id, split are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running train

0it [00:00, ?it/s]



Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from outputs/time_lm_nli/checkpoint-4009 (score: 0.7930196752075032).


Epoch,Training Loss,Validation Loss


Saving model checkpoint to outputs/time_lm_nli
Configuration saved in outputs/time_lm_nli/config.json
Model weights saved in outputs/time_lm_nli/pytorch_model.bin
tokenizer config file saved in outputs/time_lm_nli/tokenizer_config.json
Special tokens file saved in outputs/time_lm_nli/special_tokens_map.json
Saving model checkpoint to outputs/time_lm_nli
Configuration saved in outputs/time_lm_nli/config.json
Model weights saved in outputs/time_lm_nli/pytorch_model.bin
tokenizer config file saved in outputs/time_lm_nli/tokenizer_config.json
Special tokens file saved in outputs/time_lm_nli/special_tokens_map.json


Upload file training_args.bin: 100%|##########| 3.05k/3.05k [00:00<?, ?B/s]

To https://huggingface.co/ChrisZeng/twitter-roberta-base-efl-hateval
   f7b5b38..3351361  main -> main

Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.7913333333333333}, {'name': 'F1', 'type': 'f1', 'value': 0.7899207605271177}]}


In [12]:
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)
import pandas as pd

tokenizer = AutoTokenizer.from_pretrained("ChrisZeng/twitter-roberta-base-efl-hateval")
model = AutoModelForSequenceClassification.from_pretrained(
    "ChrisZeng/twitter-roberta-base-efl-hateval"
)

training_args = TrainingArguments(
    output_dir="outputs/inference",
    overwrite_output_dir=True,
    remove_unused_columns=True,
    eval_accumulation_steps=128,
    disable_tqdm=True,
)

trainer = Trainer(model=model, tokenizer=tokenizer, args=training_args)


def predict(trainer, dataset):
    preds = trainer.predict(dataset).predictions.argmax(axis=1)
    df = (
        pd.DataFrame(
            {"id": dataset["id"], "pred": preds, "label": dataset["label_categorical"]}
        )
        .groupby("id")
        .mean()
    )
    df["pred"] = (df["pred"] > 1).astype(int) * 2
    df["label"] = df["label"].astype(int)
    return df


preds_test = predict(trainer, datasets["test"])
preds_dev = predict(trainer, datasets["validation"])

pd.DataFrame(
    {
        "dev": {
            "accuracy": accuracy_score(preds_dev["label"], preds_dev["pred"]),
            "f1": f1_score(preds_dev["label"], preds_dev["pred"], average="macro"),
        },
        "test": {
            "accuracy": accuracy_score(preds_test["label"], preds_test["pred"]),
            "f1": f1_score(preds_test["label"], preds_test["pred"], average="macro"),
        },
    }
).transpose()


https://huggingface.co/ChrisZeng/twitter-roberta-base-efl-hateval/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /home/chris-zeng/.cache/huggingface/transformers/tmpxnkrb6lw


Downloading:   0%|          | 0.00/395 [00:00<?, ?B/s]

storing https://huggingface.co/ChrisZeng/twitter-roberta-base-efl-hateval/resolve/main/tokenizer_config.json in cache at /home/chris-zeng/.cache/huggingface/transformers/52fa2fc54deb3d2a20160be468621b71678a5472392303749a84c3f0cd6a20e6.a1b36501a87d1973ca00e86b765ffe71f426cd44cd9e8f063123840c161cfb5b
creating metadata file for /home/chris-zeng/.cache/huggingface/transformers/52fa2fc54deb3d2a20160be468621b71678a5472392303749a84c3f0cd6a20e6.a1b36501a87d1973ca00e86b765ffe71f426cd44cd9e8f063123840c161cfb5b
loading file https://huggingface.co/ChrisZeng/twitter-roberta-base-efl-hateval/resolve/main/vocab.json from cache at /home/chris-zeng/.cache/huggingface/transformers/2cdccf28634cc183ab7a37d256612502177ae99e58f0031b1f0af8593fadfc89.bfdcc444ff249bca1a95ca170ec350b442f81804d7df3a95a2252217574121d7
loading file https://huggingface.co/ChrisZeng/twitter-roberta-base-efl-hateval/resolve/main/merges.txt from cache at /home/chris-zeng/.cache/huggingface/transformers/af2b7cbb3983428236c5212830278a3e

Downloading:   0%|          | 0.00/946 [00:00<?, ?B/s]

storing https://huggingface.co/ChrisZeng/twitter-roberta-base-efl-hateval/resolve/main/config.json in cache at /home/chris-zeng/.cache/huggingface/transformers/8da03c3508aa6d4e4b598d7283c5aa609f8c3cb258cbb0bb4b366e06f6651e30.238fee94be8de4ee8a50af15fad79738c5b7b1e418024f07d61a75cdf765e3fb
creating metadata file for /home/chris-zeng/.cache/huggingface/transformers/8da03c3508aa6d4e4b598d7283c5aa609f8c3cb258cbb0bb4b366e06f6651e30.238fee94be8de4ee8a50af15fad79738c5b7b1e418024f07d61a75cdf765e3fb
loading configuration file https://huggingface.co/ChrisZeng/twitter-roberta-base-efl-hateval/resolve/main/config.json from cache at /home/chris-zeng/.cache/huggingface/transformers/8da03c3508aa6d4e4b598d7283c5aa609f8c3cb258cbb0bb4b366e06f6651e30.238fee94be8de4ee8a50af15fad79738c5b7b1e418024f07d61a75cdf765e3fb
Model config RobertaConfig {
  "_name_or_path": "ChrisZeng/twitter-roberta-base-efl-hateval",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 

Unnamed: 0,accuracy,f1
dev,0.793,0.791765
test,0.539,0.500086
