In [1]:
import accelerate, transformers
print("accelerate:", accelerate.__version__)
print("transformers:", transformers.__version__)

accelerate: 1.12.0
transformers: 4.57.3


In [2]:
!git clone https://github.com/DariusCornescu/Emotion-Detection.git
%cd Emotion-Detection/EA-RoBERTa implementation/
!ls

Cloning into 'Emotion-Detection'...
remote: Enumerating objects: 57, done.[K
remote: Counting objects: 100% (57/57), done.[K
remote: Compressing objects: 100% (43/43), done.[K
remote: Total 57 (delta 9), reused 57 (delta 9), pack-reused 0 (from 0)[K
Receiving objects: 100% (57/57), 17.60 MiB | 19.15 MiB/s, done.
Resolving deltas: 100% (9/9), done.
/content/Emotion-Detection/EA-RoBERTa implementation
baseline_RoBERTa.ipynb	data  data_preprocess.ipynb  data.zip  reports


In [10]:
import pandas as pd
import numpy as np
import json
import torch , os
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef
from transformers import ( RobertaTokenizerFast, RobertaModel, DataCollatorWithPadding, TrainingArguments, Trainer )
from transformers.modeling_outputs import SequenceClassifierOutput
import torch.nn as nn
import torch.nn.functional as F
from typing import Optional

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


### ÎNCĂRCARE DATE PROCESATE

 1. Încărcare DataFrames
 2. Încărcare Label Mappings
 3. Încărcare Config
 4. Tokenizer
 5. Dataset Class
 6. Creare Datasets
 7. Data Collator


In [4]:
train_df_oversampled = pd.read_csv('data/processed/train_oversampled.csv')
val_df = pd.read_csv('data/processed/val.csv')
test_df = pd.read_csv('data/processed/test.csv')

print("DONE -- data uploaded ")
print(f"   Train: {len(train_df_oversampled):,}")
print(f"   Val:   {len(val_df):,}")
print(f"   Test:  {len(test_df):,}")

with open('data/processed/label_mappings.json', 'r') as f:
    mappings = json.load(f)

label_to_id = mappings['label_to_id']
id_to_label = {int(k): v for k, v in mappings['id_to_label'].items()}
label_list = mappings['label_list']
NUMBER_OF_LABELS = mappings['num_labels']

print(f"DONE -- Labels: {label_list} -- ")

with open('data/processed/config.json', 'r') as f:
    config = json.load(f)

MODEL_NAME = config['model_name']
MAX_LENGTH = config['max_length']
BATCH_SIZE = config['batch_size']

print(f"DONE -- Config: model={MODEL_NAME}, max_len={MAX_LENGTH}, batch={BATCH_SIZE} -- ")

tokenizer = RobertaTokenizerFast.from_pretrained(MODEL_NAME)
print(f"DONE -- Tokenizer încărcat -- ")

class EmotionDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=128):
        self.texts = dataframe['text'].tolist()
        self.labels = dataframe['label_id'].tolist()
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer( text, truncation=True, max_length=self.max_length, return_tensors=None )
        
        return { 'input_ids': encoding['input_ids'], 'attention_mask': encoding['attention_mask'], 'labels': label }


train_dataset = EmotionDataset(train_df_oversampled, tokenizer, MAX_LENGTH)
val_dataset = EmotionDataset(val_df, tokenizer, MAX_LENGTH)
test_dataset = EmotionDataset(test_df, tokenizer, MAX_LENGTH)

print(f"DONE -- Datasets create:")
print(f"   Train: {len(train_dataset):,}")
print(f"   Val:   {len(val_dataset):,}")
print(f"   Test:  {len(test_dataset):,}")


data_collator = DataCollatorWithPadding( tokenizer=tokenizer, padding=True, return_tensors='pt' )
print(f"DONE -- DataCollator configurat")


DONE -- data uploaded 
   Train: 32,454
   Val:   2,000
   Test:  2,000
DONE -- Labels: ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise'] -- 
DONE -- Config: model=roberta-base, max_len=128, batch=16 -- 


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


DONE -- Tokenizer încărcat -- 
DONE -- Datasets create:
   Train: 32,454
   Val:   2,000
   Test:  2,000
DONE -- DataCollator configurat


In [8]:
class ESALayer(nn.Module):
    """
    ESA = standard self-attention + learnable feature scaling vector S ∈ R^H.

    Input:
      E_input: [B, L, H]  (RoBERTa last_hidden_state)
      attention_mask: [B, L] (1=real, 0=pad)

    Output:
      Z_scaled: [B, L, H]
      attn_probs: [B, L, L]  (optional, useful for debugging/visualization)
    """
    def __init__(self, hidden_dim: int, num_heads: int = 2, max_len: int = 512):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        self.max_len = max_len

        # Learned positional encoding P ∈ R[max_len, H]
        self.pos_emb = nn.Embedding(max_len, hidden_dim)

        # Multi-head self-attention (Transformer-style)
        self.mha = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=num_heads, batch_first=True)

        # Learnable scaling vector S ∈ R[H]
        self.S = nn.Parameter(torch.ones(hidden_dim))

    def forward(self, E: torch.Tensor, attention_mask: Optional[torch.Tensor] = None):
        """
        E: [B, L, H]  (e.g., roberta_outputs.last_hidden_state)
        attention_mask: [B, L] with 1=real token, 0=pad
        """
        B, L, H = E.shape
        assert H == self.hidden_dim
        assert L <= self.max_len, f"Sequence length {L} exceeds max_len {self.max_len}"

        # Build positions [L] and expand to [B, L] for embedding lookup
        positions = torch.arange(L, device=E.device).unsqueeze(0).expand(B, L)  # [B, L]
        P = self.pos_emb(positions)  # [B, L, H]

        # (1) E_input = E + P
        E_input = E + P

        # Prepare key padding mask for MHA: True means "ignore"
        key_padding_mask = None
        if attention_mask is not None:
            key_padding_mask = (attention_mask == 0)  # [B, L] boolean

        # (2) Standard attention
        Z, attn_weights = self.mha(
            E_input, E_input, E_input,
            key_padding_mask=key_padding_mask,
            need_weights=True,
            average_attn_weights=False  # returns per-head weights (closer to attention analysis)
        )  # Z: [B, L, H]

        # (3) Emotion-specific scaling: Z_scaled = Z ⊙ S
        Z_scaled = Z * self.S  # broadcasts [H] -> [B, L, H]

        # (5) Re-add positional encoding: Z_final = Z_scaled + P
        Z_final = Z_scaled + P

        return Z_final, attn_weights

In [21]:
class RobertaESAClassifier(nn.Module):
    def __init__(self, model_name, num_labels, dropout, id2label=None, label2id=None, dense_dim=256):
        super().__init__()

        self.num_labels = num_labels
        self.roberta = RobertaModel.from_pretrained(model_name)

        cfg = self.roberta.config
        hidden_size = self.roberta.config.hidden_size

        self.esa_layer = ESALayer(
            hidden_dim=hidden_size, 
            num_heads=cfg.num_attention_heads, 
            max_len=cfg.max_position_embeddings
        )

        self.pre_dropout = nn.Dropout(dropout)
        self.dense = nn.Sequential(
            nn.Linear(2 *hidden_size, dense_dim),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

        self.classifier = nn.Linear(dense_dim, num_labels)
        self.loss_fn = nn.CrossEntropyLoss()

        if id2label is not None:
            self.roberta.config.id2label = id2label
        if label2id is not None:
            self.roberta.config.label2id = label2id


    def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
        outputs = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=True
        )

        E = outputs.last_hidden_state  # [B, L, H]
        Z_final, esa_attn_probs = self.esa_layer(E, attention_mask=attention_mask)

        # CLS pooling
        cls_pool = Z_final[:, 0, :] 

        # Mean pooling (masked)
        mask = attention_mask.unsqueeze(-1)  # [B, L, 1]
        sum_pool = (Z_final * mask).sum(dim=1)
        len_pool = mask.sum(dim=1).clamp(min=1e-9)
        mean_pool = sum_pool / len_pool      # [B, H]

        # Concatenate pooling outputs
        pooled = torch.cat([cls_pool, mean_pool], dim=-1)  # [B, 2H]

        X = self.dense(pooled)

        logits = self.classifier(X)

        loss = None
        if labels is not None:
            loss = self.loss_fn(logits, labels)

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
        )

In [22]:
BASELINE_CONFIGURATION = {
    'model_name': 'roberta-base',
    'learning_rate': 1e-5,
    'batch_size': 16,
    'num_epochs': 10,
    'dropout': 0.3,
    'weight_decay': 0.01,
    'warmup_ratio': 0.1,
    'max_length': 128,
    'num_labels': 6
}

device = "cuda" if torch.cuda.is_available() else "cpu"

id2label = {0: 'anger', 1: 'fear', 2: 'joy', 3: 'love', 4: 'sadness', 5: 'surprise'}
label2id = {v: k for k, v in id2label.items()}

os.makedirs('models/baseline_roberta', exist_ok=True)
os.makedirs('reports', exist_ok=True)

model = RobertaESAClassifier(
    model_name=BASELINE_CONFIGURATION["model_name"],
    num_labels=BASELINE_CONFIGURATION["num_labels"],
    dropout=BASELINE_CONFIGURATION["dropout"],
    id2label=id2label,
    label2id=label2id
).to(device)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"\nDONE -- Model încărcat pe {device}")
print(f"   Total parametri: {total_params:,}")
print(f"   Parametri antrenabili: {trainable_params:,}")



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



DONE -- Model încărcat pe cpu
   Total parametri: 127,798,534
   Parametri antrenabili: 127,798,534


In [23]:
model.train() 

# take ONE small batch
batch = next(iter(DataLoader(train_dataset, batch_size=2, collate_fn=data_collator)))
batch = {k: v.to(device) for k, v in batch.items()}

# forward
out = model(**batch)

print("Logits shape:", out.logits.shape)  # should be [2, 6]
print("Loss:", out.loss.item())

# backward (gradient check)
out.loss.backward()

print("Backward pass OK")

Logits shape: torch.Size([2, 6])
Loss: 1.8087496757507324
Backward pass OK


Although the architecture diagram includes a Softmax layer, in our implementation Softmax is applied implicitly within the CrossEntropyLoss during training and explicitly only at inference time to obtain emotion probabilities.