In [None]:
!git clone https://github.com/AminMohamed-3/Emotion-Classification.git
!pip install transformers dataset accelerate -q
import sys
sys.path.append("/kaggle/working/Emotion-Classification")

In [1]:
import os
import numpy as np
import torch
import wandb
from datasets import load_dataset
from dotenv import load_dotenv
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorForTokenClassification,
    Trainer,
    TrainingArguments,
)
from config import NUM_LABELS
from Training.utils import compute_metrics

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

# Load & Tokenize dataset

In [40]:
dataset = load_dataset("go_emotions", "simplified")
i2s = dataset["train"].features["labels"].feature.int2str

In [41]:
model_checkpoint = "FacebookAI/roberta-base"  # Using a larger model
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [42]:
tokenize_function = lambda examples: tokenizer(
    examples["text"], padding="max_length", truncation=True, return_tensors="pt"
)
labels_to_one_hot = lambda examples: {
    "labels": np.sum(np.eye(NUM_LABELS, dtype=np.float16)[examples["labels"]], axis=0)
}
dataset = dataset.map(tokenize_function, batched=True)
dataset = dataset.map(
    function=labels_to_one_hot,
    batched=False,
)

Map: 100%|██████████| 5427/5427 [00:01<00:00, 5231.50 examples/s]
Map: 100%|██████████| 43410/43410 [00:04<00:00, 8952.34 examples/s] 
Map: 100%|██████████| 5426/5426 [00:00<00:00, 7701.73 examples/s]
Map: 100%|██████████| 5427/5427 [00:00<00:00, 11202.50 examples/s]


# Define Model

In [43]:
# label & id mapping
id2label = {k: i2s(k) for k in range(NUM_LABELS)}
label2id = {v: k for k, v in id2label.items()}

In [39]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=NUM_LABELS,
    id2label=id2label,
    label2id=label2id,
    problem_type="multi_label_classification",
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Trainer

In [44]:
# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,  # Training for longer
    per_device_train_batch_size=16,  # smaller batch size
    per_device_eval_batch_size=32,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,  # Using a smaller LR
    save_total_limit=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    logging_steps=20,
    weight_decay=0.01,
)

In [45]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_metrics,
)

In [46]:
example = dataset["train"][0]
example = {k: v for k, v in example.items() if k in ["input_ids", "attention_mask"]}
# convert to tensor
example = {k: torch.tensor(v).unsqueeze(0).to(device) for k, v in example.items()}
output = model(**example)

In [48]:
wandb.login(key=os.environ["wandb"])
trainer.train()



Epoch,Training Loss,Validation Loss
