In [1]:
!git clone https://github.com/AminMohamed-3/Emotion-Classification.git
!pip install transformers dataset accelerate -q
import sys
sys.path.append("/kaggle/working/Emotion-Classification")

Cloning into 'Emotion-Classification'...
remote: Enumerating objects: 113, done.[K
remote: Counting objects: 100% (113/113), done.[K
remote: Compressing objects: 100% (84/84), done.[K
remote: Total 113 (delta 53), reused 84 (delta 24), pack-reused 0[K
Receiving objects: 100% (113/113), 270.72 KiB | 288.00 KiB/s, done.
Resolving deltas: 100% (53/53), done.
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
embedchain 0.1.102 requires sqlalchemy<3.0.0,>=2.0.27, but you have sqlalchemy 1.4.52 which is incompatible.[0m[31m
[0m

In [24]:
import torch
from Training.dataset import prepare_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    DataCollatorForTokenClassification,
    Trainer,
)
import numpy as np
from config import NUM_LABELS
import wandb
from Training.utils import compute_metrics
from Training.utils import MultiLabelTrainer
from tqdm import tqdm

# Define the model & Prepare Dataset

In [2]:
model_checkpoint = "distilbert/distilroberta-base"  # Using a larger model
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
dataset, id2label, label2id = prepare_dataset(tokenizer)

model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=NUM_LABELS,
    id2label=id2label,
    label2id=label2id,
    problem_type="multi_label_classification",
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

100%|██████████| 211225/211225 [00:01<00:00, 108722.37it/s]
Map: 100%|██████████| 168980/168980 [00:25<00:00, 6537.72 examples/s]
Map: 100%|██████████| 21122/21122 [00:03<00:00, 6037.44 examples/s]
Map: 100%|██████████| 21123/21123 [00:03<00:00, 6781.23 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Pytorch Dataset

In [3]:
train_dataloader = torch.utils.data.DataLoader(
    dataset["train"].with_format("torch"), batch_size=16, shuffle=True
)
val_dataloader = torch.utils.data.DataLoader(
    dataset["val"].with_format("torch"), batch_size=16, shuffle=True
)
test_dataloader = torch.utils.data.DataLoader(
    dataset["test"].with_format("torch"), batch_size=16, shuffle=True
)

# Model Class

In [22]:
class BERTClassifier(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, **kwargs):
        return self.model(**kwargs, return_dict=False)


model_torch = BERTClassifier(model)

In [None]:
# Training hyperparameters
from torch.optim import AdamW

LR = 2e-5
EPOCHS = 5
optimizer = AdamW(params=model.parameters(), lr=LR)
loss_fn = torch.nn.BCEWithLogitsLoss()

In [None]:
import wandb

# Initialize a new run
wandb.login(key="62f8ddd1a44f05efc5c27f0ee5f22cf5bd70abc5")
wandb.init(project="Emotions", name="naive_torch")
# add wandb api key

In [None]:
# training loop
for epoch in range(EPOCHS):
    model_torch.train()
    for batch in tqdm(train_dataloader):
        optimizer.zero_grad()
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        outputs = model_torch(input_ids=input_ids, attention_mask=attention_mask)[0]
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        # Log training loss
        wandb.log({"Train Loss": loss.item()})

    model_torch.eval()
    val_loss = 0
    val_acc = 0
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            outputs = model_torch(input_ids=input_ids, attention_mask=attention_mask)[0]
            loss = loss_fn(outputs, labels)
            val_loss += loss.item()
            val_acc += (outputs.argmax(1) == labels).sum().item()

            # Log validation loss
            wandb.log({"Val Loss": loss.item()})

    val_loss /= len(val_dataloader)
    val_acc /= len(dataset["val"])
    print(f"Epoch {epoch + 1}/{EPOCHS}")
    print(f"Val loss: {val_loss:.4f}, Val acc: {val_acc:.4f}")