In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import pandas as pd
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel
from transformers import logging as transformers_logging # logging is used to avoid the unnecessary warnings while downloading the custom model from hf
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score
import warnings

In [None]:
warnings.filterwarnings("ignore")
transformers_logging.set_verbosity_error() # to avoid unnecessary warnings while downloading the custom model from hf

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model_id = "Hidden-States/roberta-base-go-emotions"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id, trust_remote_code=True)
model.to(device).eval()

In [None]:
class GoEmotions_Dataset(Dataset):
    def __init__(self, data: pd.DataFrame, tokenizer):
        self.tokenizer = tokenizer
        self.data = data
        self.max_len = 128
        self.target_cols = [str(i) for i in range(28)]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        text = str(item.text)
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            truncation=True,
            max_length=self.max_len,
            padding='max_length',
            return_tensors='pt'
        )
        target = torch.tensor(item[self.target_cols].values.astype('float32'))
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "hard_target": target
        }

In [None]:
test_path = "/content/drive/MyDrive/Go-Emotions-Test.csv"
df_test = pd.read_csv(test_path)
test_dataloader = DataLoader(GoEmotions_Dataset(df_test, tokenizer), batch_size=64, num_workers=2)

In [None]:
def evaluate(model, dataloader, device, threshold=0.5):
    preds_all = []
    truths_all = []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            targets = batch['hard_target'].to(device)

            _, logits = model(input_ids=input_ids, attention_mask=attention_mask)

            probs = torch.sigmoid(logits)
            preds = (probs >= threshold).int()

            preds_all.append(preds.cpu())
            truths_all.append(targets.cpu().int())

    preds_all = torch.cat(preds_all, dim=0).numpy()
    truths_all = torch.cat(truths_all, dim=0).numpy()

    # Compute metrics
    micro_precision = precision_score(truths_all, preds_all, average='micro', zero_division=0)
    macro_precision = precision_score(truths_all, preds_all, average='macro', zero_division=0)

    micro_recall = recall_score(truths_all, preds_all, average='micro', zero_division=0)
    macro_recall = recall_score(truths_all, preds_all, average='macro', zero_division=0)

    micro_f1 = f1_score(truths_all, preds_all, average='micro', zero_division=0)
    macro_f1 = f1_score(truths_all, preds_all, average='macro', zero_division=0)

    print(f"\n\nMicro Precision: {micro_precision} \nMacro Precision: {macro_precision}\n")
    print(f"Micro Recall: {micro_recall} \nMacro Recall: {macro_recall}\n")
    print(f"Micro F1: {micro_f1} \nMacro F1: {macro_f1}")

    emotion_labels = [
        "admiration", "amusement", "anger", "annoyance", "approval", "caring", 
        "confusion", "curiosity", "desire", "disappointment", "disapproval", 
        "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", 
        "joy", "love", "nervousness", "optimism", "pride", "realization", 
        "relief", "remorse", "sadness", "surprise", "neutral"
    ]

    print("\n===== CLASSIFICATION REPORT =====\n")
    print(classification_report(
        truths_all,
        preds_all,
        target_names=emotion_labels,
        zero_division=0
    ))

In [None]:
evaluate(model, test_dataloader, device)