In [1]:
import os
import re
import string

import datasets
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import transformers
import wandb

from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
from tqdm.notebook import tqdm, trange



In [2]:
%env WANDB_PROJECT=sarc-finetuning
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: simon-andrews (umass-iesl-is). Use `wandb login --relogin` to force relogin


env: WANDB_PROJECT=sarc-finetuning


True

In [3]:
def tokenize(example):
    tokenizer = transformers.BertweetTokenizer.from_pretrained(
        "vinai/bertweet-base",
        do_lower_case=True
    )

    encoded_dict = tokenizer(
        example["text"],
        add_special_tokens=True, # add [CLS] and [SEP]
        max_length=64, # appropriate for tweets
        padding="max_length",
        truncation=True, # truncate large inputs
        return_attention_mask=True, # construct attention masks
    )

    return {
        "input_ids": encoded_dict["input_ids"],
        "attention_mask": encoded_dict["attention_mask"]
    }

In [4]:
# 0 --> not sarcastic
# 1 --> sarcastic

sarc_data = pd.read_csv("../SARC2/sarc_processed.csv") \
    .drop(columns=["Unnamed: 0"]) \
    .sample(frac=0.5, random_state=685)

sarc_data = datasets.Dataset.from_pandas(sarc_data) \
    .remove_columns("__index_level_0__") \
    .train_test_split(test_size=0.2, seed=685) \
    .map(tokenize, batched=True)

sarc_data

  0%|          | 0/26 [00:00<?, ?ba/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make s

  0%|          | 0/7 [00:00<?, ?ba/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 25866
    })
    test: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 6467
    })
})

In [5]:
device = torch.device('cuda' if torch.has_cuda else 'cpu'); device

device(type='cuda')

In [6]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    cfm = confusion_matrix(labels, predictions)
    true_negatives = cfm[0][0]
    false_negatives = cfm[1][0]
    true_positives = cfm[1][1]
    false_positives = cfm[0][1]
    
    return {
        "Accuracy": accuracy_score(labels, predictions),
        "F1 score": f1_score(labels, predictions),
        "True positives": true_positives,
        "False positives": false_positives,
        "True negatives": true_negatives,
        "False negatives": false_negatives,
        "Precision": precision_score(labels, predictions),
        "Recall": recall_score(labels, predictions),
    }

compute_metrics((
    np.random.normal(size=(6, 2)),
    np.array([0, 1, 0, 0, 1, 0])
))

{'Accuracy': 0.3333333333333333,
 'F1 score': 0.3333333333333333,
 'True positives': 1,
 'False positives': 3,
 'True negatives': 1,
 'False negatives': 1,
 'Precision': 0.25,
 'Recall': 0.5}

In [7]:
for learning_rate in [1e-5, 2e-5, 3e-5, 4e-5]:
    with wandb.init() as run:
        model = transformers.AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=2).to(device)
        name = f"bertweet-then-sarc-with-lr-{learning_rate}"

        training_args = transformers.TrainingArguments(
            output_dir=name,
            report_to="wandb",
            run_name=name,
            num_train_epochs=3,
            learning_rate=learning_rate,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            per_device_train_batch_size=64,
            logging_steps=20,
        )

        trainer = transformers.Trainer(
            model=model,
            args=training_args,
            train_dataset=sarc_data["train"],
            eval_dataset=sarc_data["test"],
            compute_metrics=compute_metrics,
        )

        trainer.train()

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'roberta.pooler.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

Epoch,Training Loss,Validation Loss,Accuracy,F1 score,True positives,False positives,True negatives,False negatives,Precision,Recall
1,0.6139,0.597793,0.67945,0.661882,2029,884,2365,1189,0.696533,0.630516
2,0.5803,0.58138,0.692748,0.676121,2074,843,2406,1144,0.711004,0.6445
3,0.5446,0.583983,0.696304,0.692645,2213,959,2290,1005,0.697667,0.687694


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6467
  Batch size = 8
Saving model checkpoint to bertweet-then-sarc-with-lr-1e-05/checkpoint-405
Configuration saved in bertweet-then-sarc-with-lr-1e-05/checkpoint-405/config.json
Model weights saved in bertweet-then-sarc-with-lr-1e-05/checkpoint-405/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6467
  Batch size = 8
Saving model checkpoint to bertweet-then-sarc-with-lr-1e-05/checkpoint-810
Configur

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁▇█
eval/F1 score,▁▄█
eval/False negatives,█▆▁
eval/False positives,▃▁█
eval/Precision,▁█▂
eval/Recall,▁▃█
eval/True negatives,▆█▁
eval/True positives,▁▃█
eval/loss,█▁▂
eval/runtime,▁▇█

0,1
eval/Accuracy,0.6963
eval/F1 score,0.69264
eval/False negatives,1005.0
eval/False positives,959.0
eval/Precision,0.69767
eval/Recall,0.68769
eval/True negatives,2290.0
eval/True positives,2213.0
eval/loss,0.58398
eval/runtime,27.2601


loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.18.0",
  "type_vocab_size": 1,
  "use

Epoch,Training Loss,Validation Loss,Accuracy,F1 score,True positives,False positives,True negatives,False negatives,Precision,Recall
1,0.5966,0.589852,0.686408,0.650465,1887,697,2552,1331,0.730263,0.586389
2,0.5418,0.57443,0.702644,0.682306,2065,770,2479,1153,0.728395,0.641703
3,0.4827,0.590376,0.703881,0.700172,2236,933,2316,982,0.705585,0.694842


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6467
  Batch size = 8
Saving model checkpoint to bertweet-then-sarc-with-lr-2e-05/checkpoint-405
Configuration saved in bertweet-then-sarc-with-lr-2e-05/checkpoint-405/config.json
Model weights saved in bertweet-then-sarc-with-lr-2e-05/checkpoint-405/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6467
  Batch size = 8
Saving model checkpoint to bertweet-then-sarc-with-lr-2e-05/checkpoint-810
Configur

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁██
eval/F1 score,▁▅█
eval/False negatives,█▄▁
eval/False positives,▁▃█
eval/Precision,█▇▁
eval/Recall,▁▅█
eval/True negatives,█▆▁
eval/True positives,▁▅█
eval/loss,█▁█
eval/runtime,█▁▁

0,1
eval/Accuracy,0.70388
eval/F1 score,0.70017
eval/False negatives,982.0
eval/False positives,933.0
eval/Precision,0.70559
eval/Recall,0.69484
eval/True negatives,2316.0
eval/True positives,2236.0
eval/loss,0.59038
eval/runtime,26.4629


loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.18.0",
  "type_vocab_size": 1,
  "use

Epoch,Training Loss,Validation Loss,Accuracy,F1 score,True positives,False positives,True negatives,False negatives,Precision,Recall
1,0.5881,0.583363,0.694449,0.669123,1998,756,2493,1220,0.72549,0.620883
2,0.5167,0.576229,0.709139,0.696662,2160,823,2426,1058,0.724103,0.671224
3,0.4274,0.608566,0.711304,0.712902,2318,967,2282,900,0.705632,0.720323


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6467
  Batch size = 8
Saving model checkpoint to bertweet-then-sarc-with-lr-3e-05/checkpoint-405
Configuration saved in bertweet-then-sarc-with-lr-3e-05/checkpoint-405/config.json
Model weights saved in bertweet-then-sarc-with-lr-3e-05/checkpoint-405/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6467
  Batch size = 8
Saving model checkpoint to bertweet-then-sarc-with-lr-3e-05/checkpoint-810
Configur

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁▇█
eval/F1 score,▁▅█
eval/False negatives,█▄▁
eval/False positives,▁▃█
eval/Precision,██▁
eval/Recall,▁▅█
eval/True negatives,█▆▁
eval/True positives,▁▅█
eval/loss,▃▁█
eval/runtime,▁█▆

0,1
eval/Accuracy,0.7113
eval/F1 score,0.7129
eval/False negatives,900.0
eval/False positives,967.0
eval/Precision,0.70563
eval/Recall,0.72032
eval/True negatives,2282.0
eval/True positives,2318.0
eval/loss,0.60857
eval/runtime,26.8797


loading configuration file https://huggingface.co/vinai/bertweet-base/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/356366feedcea0917e30f7f235e1e062ffc2d28138445d5672a184be756c8686.a2b6026e688d1b19cebc0981d8f3a5b1668eabfda55b2c42049d5eac0bc8cb2d
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertweetTokenizer",
  "transformers_version": "4.18.0",
  "type_vocab_size": 1,
  "use

Epoch,Training Loss,Validation Loss,Accuracy,F1 score,True positives,False positives,True negatives,False negatives,Precision,Recall
1,0.5754,0.585461,0.693212,0.660158,1927,693,2556,1291,0.735496,0.598819
2,0.4999,0.592453,0.709293,0.684987,2044,706,2543,1174,0.743273,0.635177
3,0.382,0.633121,0.705891,0.709796,2326,1010,2239,892,0.697242,0.722809


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6467
  Batch size = 8
Saving model checkpoint to bertweet-then-sarc-with-lr-4e-05/checkpoint-405
Configuration saved in bertweet-then-sarc-with-lr-4e-05/checkpoint-405/config.json
Model weights saved in bertweet-then-sarc-with-lr-4e-05/checkpoint-405/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6467
  Batch size = 8
Saving model checkpoint to bertweet-then-sarc-with-lr-4e-05/checkpoint-810
Configur

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁█▇
eval/F1 score,▁▅█
eval/False negatives,█▆▁
eval/False positives,▁▁█
eval/Precision,▇█▁
eval/Recall,▁▃█
eval/True negatives,██▁
eval/True positives,▁▃█
eval/loss,▁▂█
eval/runtime,██▁

0,1
eval/Accuracy,0.70589
eval/F1 score,0.7098
eval/False negatives,892.0
eval/False positives,1010.0
eval/Precision,0.69724
eval/Recall,0.72281
eval/True negatives,2239.0
eval/True positives,2326.0
eval/loss,0.63312
eval/runtime,26.3607


In [9]:
model = transformers.AutoModelForPreTraining.from_pretrained("./bertweet-then-sarc-with-lr-3e-05/checkpoint-1215")

model.save_pretrained("sarc-final")

loading configuration file ./bertweet-then-sarc-with-lr-3e-05/checkpoint-1215/config.json
Model config RobertaConfig {
  "_name_or_path": "./bertweet-then-sarc-with-lr-3e-05/checkpoint-1215",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "tokenizer_class": "BertweetTokenizer",
  "torch_dtype": "float32",
  "transformers_version": "4.18.0",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 64001
}

loading weights file ./bertweet-