In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModel
)
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report
from huggingface_hub import hf_hub_download

In [3]:
roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
roberta_base = AutoModel.from_pretrained("roberta-base")

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("using device: ", device)

using device:  cuda


In [5]:
roberta_base.pooler = None
roberta_base.gradient_checkpointing_enable()

In [6]:
roberta_base.config.hidden_size

768

In [7]:
test_path = "/content/drive/MyDrive/Go-Emotions-Test.csv"

df_test = pd.read_csv(test_path)

In [8]:
class GoEmotions_Dataset(Dataset):
    def __init__(self, data: pd.DataFrame, tokenizer):
        self.tokenizer = tokenizer
        self.data = data
        self.max_len = 128
        self.target_cols = [str(i) for i in range(28)]

    def __len__(self):
        return(len(self.data))

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        text = str(item.text)
        encoding = self.tokenizer.encode_plus(text,
                                            add_special_tokens=True,
                                            truncation=True,
                                            return_tensors='pt',
                                            max_length=self.max_len,
                                            padding='max_length',
                                            return_attention_mask=True)

        target = torch.tensor(item[self.target_cols].values.astype('float32'))

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "atten_mask": encoding["attention_mask"].squeeze(0),
            "hard_target": target
        }

In [12]:
test_dataloader = DataLoader(GoEmotions_Dataset(df_test, roberta_tokenizer), batch_size=64, num_workers=2)

In [15]:
class Encoder(nn.Module):

    def __init__(self, base_encoder):
        super().__init__()
        self.encoder = base_encoder

    def forward(self, inputs):

        outputs = self.encoder(**inputs, output_hidden_states=True)
        last_hidden_state = outputs.hidden_states[-1]

        atten_mask = inputs['attention_mask']

        atten_mask = atten_mask.unsqueeze(-1).float()
        text_emb = (last_hidden_state * atten_mask).sum(dim=1) / atten_mask.sum(dim=1).clamp(min=1e-9)
        text_emb = F.normalize(text_emb, p=2, dim=1)

        return text_emb

In [16]:
class Classifier(nn.Module):
    def __init__(self, input_dim=768, num_classes=28):
        super().__init__()
        self.input_dim = input_dim

        self.mlp = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.LayerNorm(512),
            nn.GELU(),
            nn.Dropout(0.25),
            nn.Linear(512, num_classes)
        )

    def forward(self, h):
        return self.mlp(h)

In [17]:
# Main Model class
class EmoAxis(nn.Module):
    def __init__(self, encoder, classifier):
        super().__init__()
        self.encoder = encoder
        self.classifier = classifier

    def forward(self, inputs: dict):
        # Encoder
        outputs = self.encoder(inputs)

        # Classifier
        logits = self.classifier(outputs)

        return outputs, logits

In [None]:
def evaluate(model, dataloader, device, threshold=0.5):

    model.eval()

    preds_all = []
    truths_all = []

    with torch.no_grad():
        for i, batch in tqdm(enumerate(dataloader), total=len(dataloader)):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['atten_mask'].to(device)
            hard_target = batch['hard_target'].to(device)

            _,logits = model(inputs={"input_ids": input_ids, "attention_mask": attention_mask})

            probs = torch.sigmoid(logits)
            preds = (probs >= threshold).int()

            preds_all.append(preds.cpu())
            truths_all.append(hard_target.cpu().int())

    preds_all = torch.cat(preds_all, dim=0).numpy()
    truths_all = torch.cat(truths_all, dim=0).numpy()

    # Compute metrics
    micro_precision = precision_score(truths_all, preds_all, average='micro', zero_division=0)
    macro_precision = precision_score(truths_all, preds_all, average='macro', zero_division=0)

    micro_recall = recall_score(truths_all, preds_all, average='micro', zero_division=0)
    macro_recall = recall_score(truths_all, preds_all, average='macro', zero_division=0)

    micro_f1 = f1_score(truths_all, preds_all, average='micro', zero_division=0)
    macro_f1 = f1_score(truths_all, preds_all, average='macro', zero_division=0)

    print(f"\n\nMicro Precision: {micro_precision} \nMacro Precision: {macro_precision}\n")
    print(f"Micro Recall: {micro_recall} \nMacro Recall: {macro_recall}\n")
    print(f"Micro F1: {micro_f1} \nMacro F1: {macro_f1}")

    emotion_labels = [
    "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire",
    "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief",
    "joy", "love", "nervousness", "neutral", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise"
    ]

    print("\n===== CLASSIFICATION REPORT =====\n")

    print(classification_report(
        truths_all,
        preds_all,
        target_names=emotion_labels,
        zero_division=0
    ))

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

ckpt_path = hf_hub_download(
    repo_id="Hidden-States/roberta-base-go-emotions-pt-only",
    filename="EmoAxis-Go-Emotions.pt"
)

model.pt:   0%|          | 0.00/1.49G [00:00<?, ?B/s]

In [24]:
checkpoint = torch.load(ckpt_path, map_location="cpu")
state_dict = checkpoint["model_state_dict"]

In [25]:
encoder = Encoder(base_encoder=roberta_base)
classifier = Classifier()
trained_model = EmoAxis(encoder=encoder, classifier=classifier)

In [26]:
trained_model.load_state_dict(state_dict, strict=False)
trained_model.to(device)
print("Checkpoint loaded successfully!")

Checkpoint loaded successfully!


In [27]:
trained_model.eval()

EmoAxis(
  (encoder): Encoder(
    (encoder): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutput(
                (dense): Linear(in_features=768, o

In [28]:
evaluate(trained_model, test_dataloader, device)

100%|██████████| 85/85 [00:30<00:00,  2.75it/s]



Micro Precision: 0.5930382466695315 
Macro Precision: 0.5704941965207051

Micro Recall: 0.6541317743719387 
Macro Recall: 0.5724077340480935

Micro F1: 0.6220886551465064 
Macro F1: 0.5578135211573036

===== CLASSIFICATION REPORT =====

                precision    recall  f1-score   support

    admiration       0.60      0.86      0.70       504
     amusement       0.71      0.95      0.81       264
         anger       0.43      0.62      0.51       198
     annoyance       0.45      0.34      0.38       320
      approval       0.49      0.38      0.43       351
        caring       0.47      0.44      0.46       135
     confusion       0.51      0.46      0.48       153
     curiosity       0.47      0.76      0.58       284
        desire       0.61      0.53      0.57        83
disappointment       0.51      0.21      0.30       151
   disapproval       0.49      0.31      0.38       267
       disgust       0.57      0.44      0.50       123
 embarrassment       0.73      0


