In [2]:
!git clone https://github.com/AminMohamed-3/Emotion-Classification.git
!pip install transformers dataset accelerate -q
import sys
sys.path.append("/kaggle/working/Emotion-Classification")

fatal: destination path 'Emotion-Classification' already exists and is not an empty directory.


In [1]:
import torch
from Training.dataset import prepare_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    EvalPrediction,
)
import numpy as np

from config import NUM_LABELS

  from .autonotebook import tqdm as notebook_tqdm


# Define the model & Prepare Dataset

In [2]:
model_checkpoint = "distilbert/distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
dataset, id2label, label2id = prepare_dataset(tokenizer)

100%|██████████| 211225/211225 [00:01<00:00, 143331.68it/s]
Map: 100%|██████████| 168980/168980 [00:09<00:00, 18701.40 examples/s]
Map: 100%|██████████| 21122/21122 [00:01<00:00, 19521.06 examples/s]
Map: 100%|██████████| 21123/21123 [00:01<00:00, 17545.52 examples/s]


In [3]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, num_labels=NUM_LABELS, id2label=id2label, label2id=label2id
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Trainer

In [7]:
from transformers import Trainer, TrainingArguments, DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer, padding=True)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    save_total_limit=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
)

In [24]:
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
    roc_auc_score,
)


def multi_label_metrics(preds, labels, threshold=0.5):
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(preds))
    # convert all to numpy
    probs = probs.cpu().detach().numpy()
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    y_true = labels
    print(y_true.shape)
    print(y_pred.shape)
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average="macro")
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average="macro")
    recall = recall_score(y_true, y_pred, average="macro")

    metrics = {
        "f1": f1_micro_average,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
    }

    return metrics


def compute_metrics(p: EvalPrediction):
    label_ids = p.label_ids[:, :NUM_LABELS]
    preds = p.predictions
    result = multi_label_metrics(preds=preds, labels=label_ids)
    return result

In [25]:
class MultiLabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        labels = labels[:, :NUM_LABELS].float()
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = torch.nn.BCEWithLogitsLoss()
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss


trainer = MultiLabelTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["test"],
    eval_dataset=dataset["val"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [9]:
example = dataset["train"][0]

# convert input_ids and attetnion_mask to tensor
input_ids = torch.tensor([example["input_ids"]])
attention_mask = torch.tensor([example["attention_mask"]])
labels = torch.tensor([example["labels"]]).float()

# move tensors to device
input_ids = input_ids.to(device)
attention_mask = attention_mask.to(device)
labels = labels.to(device)

# get model predictions
with torch.no_grad():
    outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
    logits = outputs.logits

# convert logits to probabilities
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits)

# convert probabilities to labels
probs = probs.cpu().numpy()
y_pred = np.zeros(probs.shape)
y_pred[np.where(probs >= 0.5)] = 1
y_true = labels
y_true = y_true.cpu().numpy()
# get metrics
f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average="micro")
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average="micro")
recall = recall_score(y_true, y_pred, average="micro")


print(f"f1: {f1_micro_average}")

print(f"accuracy: {accuracy}")

print(f"precision: {precision}")

print(f"recall: {recall}")

f1: 0.0
accuracy: 0.0
precision: 0.0
recall: 0.0
