In [17]:
import torch
import pandas as pd
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import os
from accelerate import Accelerator  
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [2]:
os.environ['HF_TOKEN']="hf_" # give your huggingface access token
os.environ['HUGGINGFACEHUB_API_TOKEN']="hf_" # give your huggingface access token

In [3]:
import sys
print(sys.executable)

/Users/tafazzulnadeem/Desktop/TBED/env/bin/python


In [4]:
# os.environ['HF_HOME']="/DATAX/.cache/huggingface"
os.environ['HF_HOME']="/Users/tafazzulnadeem/.cache/huggingface/hub" # give your cache directory


In [5]:
!echo $HF_HOME
!echo $TRANSFORMERS_CACHE

/Users/tafazzulnadeem/.cache/huggingface/hub



In [6]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-large-emotion-latest", device_map="auto")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-large-emotion-latest", device_map="auto")

In [7]:
model

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
 

In [8]:
print(model.hf_device_map)

{'': 'mps'}


In [9]:
df = pd.read_csv('DatasetRelease2/train/track_a/eng.csv')

df.loc[0]

id          eng_train_track_a_00001
text            But not very happy.
Anger                             0
Fear                              0
Joy                               1
Sadness                           1
Surprise                          0
Name: 0, dtype: object

In [10]:
true_labels = []
for row in range(len(df)):
    true_label = [int(df.loc[row]['Joy']), 
                  int(df.loc[row]['Fear']), 
                  int(df.loc[row]['Anger']), 
                  int(df.loc[row]['Sadness']), 
                  int(df.loc[row]['Surprise'])]
    true_labels.append(true_label)
print(true_labels)

[[1, 0, 0, 1, 0], [1, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 1, 1, 1], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 0, 1], [1, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 1, 0, 0, 1], [1, 1, 0, 0, 1], [0, 1, 0, 1, 0], [1, 0, 0, 0, 0], [0, 1, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 0], [0, 1, 0, 1, 1], [0, 1, 0, 0, 1], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 1, 0, 1, 1], [0, 1, 0, 1, 0], [0, 1, 0, 1, 0], [0, 1, 0, 1, 0], [0, 1, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 0, 1, 1], [0, 1, 0, 0, 0], [0, 1, 0, 1, 0], [1, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 0, 1], [0, 1, 0, 0, 1], [0, 1, 1, 0, 0], [0, 1, 0, 1, 0], [1, 0, 0, 0, 0], [1, 0, 0, 1, 0], [0, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 0, 1], [0, 1, 0, 0, 0], [0, 1, 0, 0, 1], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0], [0, 1, 1, 0, 0], [0, 1, 0, 0, 

In [11]:
print(len(true_labels))

2768


In [12]:
emotion_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None)
correct = 0
pred_labels = []
for row in tqdm(range(len(df))):
    joy, fear, anger, sadness, surprise = 0,0,0,0,0

        
    # Define some sample text for emotion classification
    text = df.loc[row]["text"]
    # print(text)
    # Get emotion predictions
    predictions = emotion_classifier(text)
    # print(predictions)

    # Display the results
    for prediction in predictions[0]:
        # print(prediction['score'])
        # prediction['score'] = 1 if prediction['score']>0.5 else 0
        if prediction['label'] == 'joy' and prediction['score'] > 0.5:
            # print("Inside")
            joy = 1
        # else: joy = 0
        if prediction['label'] == 'fear' and prediction['score'] > 0.5:
            # print("Inside")
            fear = 1
        # else: fear = 0
        if prediction['label'] == 'anger' and prediction['score'] > 0.5:
            # print("Inside")
            anger = 1
        # else: anger = 0
        if prediction['label'] == 'sadness' and prediction['score'] > 0.5:
            # print("Inside")
            sadness = 1
        # else: sadness = 0
        if prediction['label'] == 'surprise' and prediction['score'] > 0.5:
            # print("Inside")
            surprise = 1
        # else: surprise = 0
    # print(joy, fear, anger, sadness, surprise )
    if (df.loc[row]['Joy'] == joy and 
        df.loc[row]['Fear'] == fear and
        df.loc[row]['Anger'] == anger and
        df.loc[row]['Sadness'] == sadness and 
        df.loc[row]['Surprise'] == surprise):
        correct = correct + 1
    pred_labels.append([joy, fear, anger, sadness, surprise])
print(correct/len(df))

        # print(f"Emotion: {prediction['label']}, Score: {prediction['score']:.4f}")

100%|██████████| 2768/2768 [03:08<00:00, 14.68it/s]

0.2850433526011561





In [13]:
accuracy = (correct/len(df))*100
print(accuracy)

28.50433526011561


In [14]:

# Example true labels and predicted labels for a multi-label problem
# Each sample can belong to multiple classes (represented as 1s in the matrix)


# Calculate precision and recall for the multi-label case
# average='micro': Calculate globally considering all samples together (most common for multi-label)
precision_micro = precision_score(true_labels, pred_labels, average='micro')
recall_micro = recall_score(true_labels, pred_labels, average='micro')

# average='macro': Calculate metrics for each label, then average the results equally
precision_macro = precision_score(true_labels, pred_labels, average='macro')
recall_macro = recall_score(true_labels, pred_labels, average='macro')

# average='weighted': Calculate metrics for each label, then average the results weighted by the number of true instances for each label
precision_weighted = precision_score(true_labels, pred_labels, average='weighted')
recall_weighted = recall_score(true_labels, pred_labels, average='weighted')

print(f"Micro Precision: {precision_micro}")
print(f"Micro Recall: {recall_micro}")

print(f"Macro Precision: {precision_macro}")
print(f"Macro Recall: {recall_macro}")

print(f"Weighted Precision: {precision_weighted}")
print(f"Weighted Recall: {recall_weighted}")


Micro Precision: 0.7105165366034931
Micro Recall: 0.44106113033448674
Macro Precision: 0.7446307946792767
Macro Recall: 0.5051252207159719
Weighted Precision: 0.8082594627767711
Weighted Recall: 0.44106113033448674
Accuracy: 28.50433526011561


In [18]:
accuracy = accuracy_score(true_labels, pred_labels)
micro_f1 = f1_score(true_labels, pred_labels, average='micro')
macro_f1 = f1_score(true_labels, pred_labels, average='macro')
weighted_f1 = f1_score(true_labels, pred_labels, average='weighted')
print(f"Accuracy: {accuracy}")
print(f"Micro F1 Score: {micro_f1}")
print(f"Macro F1 Score: {macro_f1}")
print(f"Weighted F1 Score: {weighted_f1}")

Accuracy: 0.2850433526011561
Micro F1 Score: 0.5442641616851693
Macro F1 Score: 0.5318302204877029
Weighted F1 Score: 0.5010192148121105


In [41]:
'''
anger,0
anticipation,1
disgust,2
fear,3
joy,4
love,5
optimism,6
pessimism,7
sadness,8
surprise,9
trust,10
'''

'\nanger,0\nanticipation,1\ndisgust,2\nfear,3\njoy,4\nlove,5\noptimism,6\npessimism,7\nsadness,8\nsurprise,9\ntrust,10\n'