In [1]:
!nvidia-smi

Mon Mar 25 23:30:20 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.52                 Driver Version: 551.52         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1660 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| 44%   29C    P8             13W /  125W |     949MiB /   6144MiB |      3%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118
Note: you may need to restart the kernel to use updated packages.


In [None]:
from datasets import load_dataset, load_metric
import numpy as np
import pandas as pd
import torch
from torch import nn
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

DEVICE =  "cuda: 0" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "FacebookAI/roberta-base"
DEVICE

'cuda: 0'

# **Dataset**

In [3]:
from datasets import load_dataset, Dataset, Value, ClassLabel, Features

my_dataset = load_dataset("./Emotion_Dataset", sep=",")
# Creating a ClassLabel Object
df = my_dataset["train"].to_pandas()
labels = ['sadness','joy','love','anger', 'fear', 'surprise']
ClassLabels = ClassLabel(num_classes=len(labels), names=labels)

# Mapping Labels to IDs
def map_label2id(example):
    example['label'] = ClassLabels.str2int(example['label'])
    return example

my_dataset= my_dataset.map(map_label2id, batched=True)

# Casting label column to ClassLabel Object
my_dataset = my_dataset.cast_column('label', ClassLabels)

my_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

In [5]:
X_train = my_dataset["train"].to_pandas()
X_val = my_dataset["validation"].to_pandas()
X_test = my_dataset["test"].to_pandas()
y_true = X_test.label
y_true

0       0
1       0
2       0
3       1
4       0
       ..
1995    3
1996    3
1997    1
1998    1
1999    4
Name: label, Length: 2000, dtype: int64

In [6]:
features = my_dataset["train"].features
features

{'text': Value(dtype='string', id=None),
 'label': ClassLabel(names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], id=None)}

In [7]:
id2label = {idx:features["label"].int2str(idx) for idx in range(6)}
id2label

{0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}

In [8]:
label2id = {v:k for k,v in id2label.items()}
label2id

{'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}

In [9]:
X_train["label"].value_counts(normalize=True).sort_index()

label
0    0.291625
1    0.335125
2    0.081500
3    0.134937
4    0.121063
5    0.035750
Name: proportion, dtype: float64

In [10]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [11]:
def tokenize_text(examples):
  return tokenizer(examples["text"], truncation=True, max_length=512)

In [12]:
my_dataset = my_dataset.map(tokenize_text, batched=True)
my_dataset

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 2000
    })
})

In [13]:
class_weights = (1 - (X_train["label"].value_counts().sort_index() / len(X_train))).values #assigning high weight to the rare classes and low weight to the common classes to balance it out as there is an imbalance in the dataset
class_weights

import torch

class_weights = torch.from_numpy(class_weights).float()
class_weights = class_weights.to("cuda")
class_weights

tensor([0.7084, 0.6649, 0.9185, 0.8651, 0.8789, 0.9643], device='cuda:0')

In [14]:
my_dataset = my_dataset.rename_column("label", "labels")

In [34]:
class WeightedLossTrainer(Trainer):
  def compute_loss(self, model, inputs, return_outputs=False):
    # Feed inputs to model and extract logits
    outputs = model(**inputs)
    logits = outputs.get("logits")
    # Extract labels
    labels = inputs.get("labels")
    #Define loss function with class weights
    loss_func=nn.CrossEntropyLoss(weight=class_weights)
    # Compute loss
    loss = loss_func(logits, labels)
    return (loss, outputs) if return_outputs else loss

In [18]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME,
                                                          num_labels=6,
                                                          id2label=id2label,
                                                          label2id=label2id)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [35]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  accuracy= accuracy_score(labels, preds)
  f1 = f1_score(labels, preds, average="weighted")
  return {"f1": f1, "accuracy" : accuracy}

In [20]:
%pip install transformers[torch]
%pip install accelerate -U
from transformers import TrainingArguments

batch_size = 64

logging_steps = len(my_dataset["train"])
output_dir = "/content/drive/MyDrive/Emotion_Output"
training_args = TrainingArguments(output_dir=output_dir,
                                  num_train_epochs=6,
                                  learning_rate=2e-5,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  logging_steps=logging_steps,
                                  fp16=True)

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [41]:
trainer = WeightedLossTrainer(model=model,
                              args=training_args,
                              compute_metrics=compute_metrics,
                              train_dataset=my_dataset["train"],
                              eval_dataset= my_dataset["validation"],
                              tokenizer=tokenizer)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [31]:
def predict(model, tokenizer):
    y_pred = []
    none_pred = []
    for i in tqdm(range(len(X_test))):
        
        prompt = X_test.iloc[i]["text"]
        pipe = pipeline(task="text-classification",
                        model=model,
                        tokenizer=tokenizer,
                       )
        result = pipe(prompt)
        answer = result[0]['label'].split("=")[-1]
        #print(result)
        #print(answer)
        if "sadness" in answer:
            y_pred.append(0)
        elif "joy" in answer:
            y_pred.append(1)
        elif "love" in answer:
            y_pred.append(2)
        elif "anger" in answer:
            y_pred.append(3)
        elif "fear" in answer:
            y_pred.append(4)
        elif "surprise" in answer:
            y_pred.append(5)
        else:
            y_pred.append(6)
            none_pred.append(answer)
        # print(none_pred)
    return y_pred, none_pred

In [32]:
def evaluate(y_true, y_pred):

    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels output: {0,1,2,3,4,5}

    # nested for loops
    for label in unique_labels:
        # will output a list of the index of one emotion at a time
        label_indices = [i for i in range(len(y_true))
                         if y_true[i] == label]
        # will output the list of one emotion
        label_y_true = [y_true[i] for i in label_indices]
        # label_y_true = [label for i in range(len(y_true))]
        # will output list of the predicted emotion in the same order as label_y_true
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2, 3, 4, 5, 6])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [42]:
y_pred, none_pred = predict(model, tokenizer)

100%|██████████| 2000/2000 [02:05<00:00, 15.94it/s]


In [43]:
evaluate(y_true, y_pred)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.176
Accuracy for label 0: 0.291
Accuracy for label 1: 0.053
Accuracy for label 2: 0.000
Accuracy for label 3: 0.000
Accuracy for label 4: 0.652
Accuracy for label 5: 0.000

Classification Report:
              precision    recall  f1-score   support

           0       0.28      0.29      0.28       581
           1       0.43      0.05      0.09       695
           2       0.00      0.00      0.00       159
           3       0.00      0.00      0.00       275
           4       0.11      0.65      0.19       224
           5       0.00      0.00      0.00        66

    accuracy                           0.18      2000
   macro avg       0.14      0.17      0.09      2000
weighted avg       0.24      0.18      0.14      2000


Confusion Matrix:
[[169  20   0   2 390   0   0]
 [218  37   0   1 439   0   0]
 [ 56   8   0   0  95   0   0]
 [ 84   9   0   0 182   0   0]
 [ 67  10   0   1 146   0   0]
 [ 19   3   0   0  44   0   0]
 [  0   0   0   0   0   0   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [44]:
model.to("cuda")

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [45]:
model.train() #changing the setting for training
trainer.train()

  0%|          | 0/1500 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.26720038056373596, 'eval_f1': 0.9080498475369141, 'eval_accuracy': 0.906, 'eval_runtime': 37.1621, 'eval_samples_per_second': 53.818, 'eval_steps_per_second': 0.861, 'epoch': 1.0}


Checkpoint destination directory /content/drive/MyDrive/Emotion_Output\checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.18085992336273193, 'eval_f1': 0.9299562686351285, 'eval_accuracy': 0.93, 'eval_runtime': 37.0703, 'eval_samples_per_second': 53.952, 'eval_steps_per_second': 0.863, 'epoch': 2.0}


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.17214852571487427, 'eval_f1': 0.9330425520880335, 'eval_accuracy': 0.9315, 'eval_runtime': 37.9522, 'eval_samples_per_second': 52.698, 'eval_steps_per_second': 0.843, 'epoch': 3.0}


Checkpoint destination directory /content/drive/MyDrive/Emotion_Output\checkpoint-1000 already exists and is non-empty. Saving will proceed but saved results may be invalid.


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.14081747829914093, 'eval_f1': 0.94149151738001, 'eval_accuracy': 0.941, 'eval_runtime': 38.0036, 'eval_samples_per_second': 52.627, 'eval_steps_per_second': 0.842, 'epoch': 4.0}


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.13884671032428741, 'eval_f1': 0.9398946686175306, 'eval_accuracy': 0.939, 'eval_runtime': 38.6136, 'eval_samples_per_second': 51.795, 'eval_steps_per_second': 0.829, 'epoch': 5.0}


Checkpoint destination directory /content/drive/MyDrive/Emotion_Output\checkpoint-1500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.14256718754768372, 'eval_f1': 0.9432912919473561, 'eval_accuracy': 0.9425, 'eval_runtime': 38.0619, 'eval_samples_per_second': 52.546, 'eval_steps_per_second': 0.841, 'epoch': 6.0}
{'train_runtime': 5756.3393, 'train_samples_per_second': 16.677, 'train_steps_per_second': 0.261, 'train_loss': 0.23213614908854166, 'epoch': 6.0}


TrainOutput(global_step=1500, training_loss=0.23213614908854166, metrics={'train_runtime': 5756.3393, 'train_samples_per_second': 16.677, 'train_steps_per_second': 0.261, 'train_loss': 0.23213614908854166, 'epoch': 6.0})

In [46]:
model.eval() #changing the setting for testing
pred = trainer.predict(my_dataset["test"])
pred

  0%|          | 0/32 [00:00<?, ?it/s]

PredictionOutput(predictions=array([[ 6.8242188 , -1.2421875 , -1.6777344 , -1.1679688 , -2.3613281 ,
        -1.3349609 ],
       [ 6.9257812 , -1.5898438 , -1.8212891 , -1.1103516 , -1.8017578 ,
        -1.4833984 ],
       [ 6.7734375 , -1.9853516 , -1.6376953 , -1.0498047 , -1.7128906 ,
        -1.3642578 ],
       ...,
       [-1.1777344 ,  7.0742188 , -0.90283203, -1.5400391 , -1.6699219 ,
        -0.8457031 ],
       [-1.5126953 ,  7.1835938 , -0.84472656, -1.4384766 , -1.2529297 ,
        -1.0322266 ],
       [-1.8066406 , -1.6054688 , -1.6992188 , -2.0175781 ,  3.7226562 ,
         4.234375  ]], dtype=float32), label_ids=array([0, 0, 0, ..., 1, 1, 4], dtype=int64), metrics={'test_loss': 0.1556072235107422, 'test_f1': 0.927315549315068, 'test_accuracy': 0.926, 'test_runtime': 37.4728, 'test_samples_per_second': 53.372, 'test_steps_per_second': 0.854})

In [47]:
trainer.evaluate(my_dataset["test"])

  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.1556072235107422,
 'eval_f1': 0.927315549315068,
 'eval_accuracy': 0.926,
 'eval_runtime': 37.5828,
 'eval_samples_per_second': 53.216,
 'eval_steps_per_second': 0.851,
 'epoch': 6.0}

In [48]:
y_pred, none_pred = predict(model, tokenizer)
evaluate(y_true, y_pred)

100%|██████████| 2000/2000 [02:05<00:00, 15.94it/s]

Accuracy: 0.926
Accuracy for label 0: 0.960
Accuracy for label 1: 0.917
Accuracy for label 2: 0.950
Accuracy for label 3: 0.949
Accuracy for label 4: 0.835
Accuracy for label 5: 0.879

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.96      0.96       581
           1       0.98      0.92      0.95       695
           2       0.78      0.95      0.86       159
           3       0.92      0.95      0.93       275
           4       0.92      0.83      0.87       224
           5       0.68      0.88      0.77        66

    accuracy                           0.93      2000
   macro avg       0.87      0.91      0.89      2000
weighted avg       0.93      0.93      0.93      2000


Confusion Matrix:
[[558   4   0  12   7   0   0]
 [  5 637  43   2   1   7   0]
 [  0   8 151   0   0   0   0]
 [  8   2   0 261   4   0   0]
 [  7   0   0  10 187  20   0]
 [  3   0   0   0   5  58   0]
 [  0   0   0   0   0   0   0]]



