In [7]:
import pandas as pd

In [8]:
import numpy as np

In [9]:
t5_test = pd.read_csv('test_t5.csv')

t5_test.head()

Unnamed: 0,text,label
0,Winter Blues and WFH question i think the sudd...,anger
1,New Workspace i saw some of your other posts a...,joy
2,Hard to mentally unwind… i go for a long walk ...,joy
3,Would you leave 150k for 90k depends on your e...,joy
4,There’s no magic formula to get remote work so...,fear


In [15]:
lexicon_path = 'NRC-Emotion-Intensity-Lexicon-v1.txt'

def load_nrc_lexicon(file_path):
    lexicon = {}
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split('\t')
            if len(parts) == 3:
                word, emotion, intensity = parts
                intensity = float(intensity)
                if word not in lexicon:
                    lexicon[word] = {}
                lexicon[word][emotion] = intensity
    return lexicon
nrc_lexicon = load_nrc_lexicon(lexicon_path)

In [16]:
def label_text(text, lexicon):
    
    # create the dict to store the score for each emotion
    emotion_scores = {emotion: 0 for emotion in set(e for values in lexicon.values() for e in values)}
    words = text.lower().split()
    
    # calculate the score of emotion based on lexicon
    for word in words:
        if word in lexicon:
            for emotion, intensity in lexicon[word].items():
                emotion_scores[emotion] += intensity
                
    # choose the emotion with the highest score
    max_emotion = max(emotion_scores, key=emotion_scores.get)
    return max_emotion

In [17]:
# predict label by NRC
t5_test['label_nrc'] = t5_test['text'].apply(lambda x: label_text(x, nrc_lexicon))

t5_test.head()

Unnamed: 0,text,label,label_nrc
0,Winter Blues and WFH question i think the sudd...,anger,surprise
1,New Workspace i saw some of your other posts a...,joy,joy
2,Hard to mentally unwind… i go for a long walk ...,joy,joy
3,Would you leave 150k for 90k depends on your e...,joy,sadness
4,There’s no magic formula to get remote work so...,fear,sadness


In [18]:
t5_test.to_csv('t5_nrc_inten_test.csv')

In [19]:
label_counts = t5_test['label_nrc'].value_counts()
label_counts

label_nrc
trust           8034
joy             5861
anticipation    3691
fear            3243
sadness         1911
anger            924
disgust          495
surprise         315
Name: count, dtype: int64

In [23]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
y_true = t5_test['label']
y_pred = t5_test['label_nrc']

accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# show the results
print("Metrics for NRC:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Metrics for NRC:
Accuracy: 0.2211
Precision: 0.5155
Recall: 0.2211
F1-Score: 0.2860


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
