In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import wandb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import transformers
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from datasets import Dataset, DatasetDict, load_metric,load_dataset
import evaluate

- We must tokenise the input before feeding to the model. We use the tokenizer from the pretrained model.
- The dataset must have the label column named as 'label', to be trained using Trainer API. It is preferable to keep the text column as 'text'. However since we are tokenising the text column, it can be named anything, as we just pass the tokenised columns input_ids and attention_mask to the model.

In [2]:
finetune_model_name = "roberta-base-emotion-prediction-phr-2"

In [3]:
label2id = {
    "anger":0,
    "anticipation":1,
    "disgust":1,
    "fear":1,
    "joy":1,
    "love":1,
    "optimism":1,
    "pessimism":1,
    "sadness":1,
    "surprise":1,
    "trust":1
}

id2label = {
    0:"anger",
    1:"anticipation",
    2:"disgust",
    3:"fear",
    4:"joy",
    5:"love",
    6:"optimism",
    7:"pessimism",
    8:"sadness",
    9:"surprise",
    10:"trust"
}

In [4]:
tokeniser = AutoTokenizer.from_pretrained("roberta-base",problem_type ="multi_label_classification")
model = AutoModelForSequenceClassification.from_pretrained("roberta-base", problem_type ="multi_label_classification", num_labels=11,label2id=label2id,id2label=id2label)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
dataset = load_dataset("vibhorag101/sem_eval_2018_task_1_english_cleaned_labels")

In [6]:
def tokeniseDataset(dataset):
    return(tokeniser(dataset["text"],padding="max_length",truncation=True))

#
dataset.set_format("torch")
dataset = dataset.map(lambda x : {"float_labels": x["labels"].to(torch.float)}, remove_columns=["labels"]).rename_column("float_labels", "labels")

column_names = dataset["train"].column_names
column_names.remove("labels")
tokenisedDataset = dataset.map(tokeniseDataset,batched=True,remove_columns=column_names)

In [7]:
trainTokeniseDataset = tokenisedDataset["train"]
testTokenisedDataset = tokenisedDataset["test"]
valTokenisedDataset = tokenisedDataset["test"]
print(trainTokeniseDataset)
print(valTokenisedDataset)
print(testTokenisedDataset)

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 6838
})
Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 3259
})
Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 3259
})


In [8]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score,precision_score,recall_score

def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    sigmoid_pred = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into predicted labels
    y_pred  = np.where(sigmoid_pred > threshold, 1, 0)

    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    precision = precision_score(y_true, y_pred, average='micro')
    recall = recall_score(y_true, y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)

    metrics = { 'accuracy': accuracy,
                'micro_precision': precision,
                'micro_recall': recall,
                'micro_f1': f1_micro_average,
                'micro_roc_auc': roc_auc,
                }
    return metrics

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    return multi_label_metrics(logits, labels)

In [9]:
wandb.login()
wandb.init(project="huggingface", entity="vibhor20349", name=finetune_model_name)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mvibhor20349[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113506599536372, max=1.0…

In [19]:
training_args = TrainingArguments(
    output_dir=finetune_model_name,
    report_to = 'wandb',
    learning_rate=2e-5, # recommended in roberta paper
    num_train_epochs=3, #recommended in bert paper
    per_device_train_batch_size=16, # recommended in roberta paper
    per_device_eval_batch_size=16, # recommended in roberta paper
    evaluation_strategy="steps",
    eval_steps = 1000,
    save_strategy="steps",
    save_steps=1000,
    load_best_model_at_end=True,
    logging_steps=500,
    push_to_hub=True #to clone the training repo just before starting to train, so push to hub works
)

In [21]:
trainer = Trainer(
    model= model,
    args=training_args,
    train_dataset=trainTokeniseDataset,
    eval_dataset=valTokenisedDataset,
    compute_metrics=compute_metrics,
    tokenizer=tokeniser
)

Cloning https://huggingface.co/vibhorag101/roberta-base-emotion-prediction-phr into local empty directory.


In [22]:
# print(trainer.predict(testTokenisedDataset))
trainer.evaluate(eval_dataset=testTokenisedDataset)

{'eval_loss': 0.685547947883606,
 'eval_accuracy': 0.0,
 'eval_micro_precision': 0.1971156796563363,
 'eval_micro_recall': 0.40818401321641884,
 'eval_micro_f1': 0.2658500248303261,
 'eval_micro_roc_auc': 0.4703000123265797,
 'eval_runtime': 30.2704,
 'eval_samples_per_second': 107.663,
 'eval_steps_per_second': 6.739}

In [None]:
trainer.train()
# #trainer.train(resume_from_checkpoint=True)

In [None]:
trainer.save_model(finetune_model_name)

In [None]:
trainer.push_to_hub(finetune_model_name)

In [23]:
### IF we want to make predictions
input_text = "You are an extremely good person. Thank you so much"
inputs = tokeniser(input_text, return_tensors="pt")

# Make predictions
with torch.no_grad():
    ## BERT expects the keyword agruments "token_type_ids" and "attention_mask" in input.
    # so we convert inputs dictionary to keyword arguments using ** before passing to the model
    inputs.to('cuda')
    outputs = model(**inputs)
    ## output of Bert contains logits for each class, pooled output and hidden states and attentions

# Extract the predicted logits(raw values for each class)
logits = outputs.logits
print(outputs)

## logits are just raw values for each class. To get probabilities we use softmax
sigmoid_logits = torch.nn.functional.sigmoid(logits)
predictions = np.where(sigmoid_logits.cpu() > 0.5, 1, 0)

# Get the predicted labels from id2label dictionary
label_predictions = []
for pred in predictions:
    label_predictions.append([id2label[i] for i, val in enumerate(pred) if val])
print(label_predictions)


SequenceClassifierOutput(loss=None, logits=tensor([[-0.1404,  0.1603,  0.0559, -0.0134, -0.2044, -0.2912,  0.0414,  0.0249,
         -0.2535,  0.0668, -0.0723]], device='cuda:0'), hidden_states=None, attentions=None)
[['anticipation', 'disgust', 'optimism', 'pessimism', 'surprise']]
