In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import requests

## Predict the NRC emotion by using NRC Emotion Intensity Lexicon model

First, we used the NRC Emotion Intensity Lexicon from https://saifmohammad.com/WebPages/AffectIntensity.htm, which provides real-valued scores of intensity for eight basic emotions. In this model, we calculated the score of emotions in each text, rather than counting the frequency of emotions. This approach helps to eliminate unpredicted labels (like neutral) and improves accuracy. These scores represent the strength or intensity of the emotional association for each word, enabling a more nuanced analysis of text. 

In [5]:
# Load the NRC Emotion Intensity Lexicon
lexicon_path = "https://raw.githubusercontent.com/SaraHoxha/emotion-detection-txa/nrc_emotion/NRC/NRC-Emotion-Intensity-Lexicon-v1.txt"

# Hàm tải tệp từ URL và load nội dung vào từ dữ liệu
def load_nrc_lexicon(url):
    lexicon = {}
    response = requests.get(url)

    if response.status_code == 200:
        # Xử lý tệp từ dữ liệu trả về
        for line in response.text.splitlines():
            parts = line.strip().split('\t')
            if len(parts) == 3:
                word, emotion, intensity = parts
                intensity = float(intensity)
                if word not in lexicon:
                    lexicon[word] = {}
                lexicon[word][emotion] = intensity
        return lexicon
nrc_lexicon = load_nrc_lexicon(lexicon_path)

In [6]:
# Predict emotion for each text
def label_text(text, lexicon):
    
    # create the dict to store the score for each emotion
    emotion_scores = {emotion: 0 for emotion in set(e for values in lexicon.values() for e in values)}
    words = text.lower().split()
    
    # calculate the score of emotion based on lexicon
    for word in words:
        if word in lexicon:
            for emotion, intensity in lexicon[word].items():
                emotion_scores[emotion] += intensity
                
    # choose the emotion with the highest score
    max_emotion = max(emotion_scores, key=emotion_scores.get)
    return max_emotion

## Convert to Parrott's emotion 

After predicting the NRC labels, we mapped our results to Parrott's emotion categories based on the definitions provided in the groups listed at  https://en.wikipedia.org/wiki/Emotion_classification. This mapping was an attempt to align the NRC emotions with Parrott's emotions to observe their correspondence.
In this approach, we highlighted some changes as below:
1. In class "anger" in Parrott's emotion, there are two sub-emotions: "eagerness" and "hope." These emotions share similar meanings with "anticipation" from the NRC lexicon, so we decided to convert it into "joy."
2. In class "anger" in Parrott's emotion, there is the "disgust" in this list, so we decided to convert it into "anger"
3. In class "love" in Parrott's emotion, we added "trust" because it’s an important part of love. Trust helps build strong, loving relationships, so we included it under "love" to show how essential it is.

In [7]:
def map_nrc_to_parrott(nrc_emotion):
    mapping = {
        "anger": "anger",
        "anticipation": "joy",  # convert Anticipation to Joy
        "disgust": "anger",   # convert Disgust to Anger
        "fear": "fear",
        "joy": "joy",
        "sadness": "sadness",
        "surprise": "surprise",
        "trust": "love",         # convert Trust to Love
    }
    return mapping.get(nrc_emotion, None)

## Calculate metrics

After having the emotion with 6 emotions, we implemented to calculate the metrics to compare 2 models together

In [8]:
# Calculate the metrics 
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall,
        'f1': f1}

In [9]:
# Save the metrics to the result file
def save_metrics_to_file(metrics, filename):
    metrics_str = (f"Accuracy: {metrics['accuracy']:.4f}\n"
        f"Precision: {metrics['precision']:.4f}\n"
        f"Recall: {metrics['recall']:.4f}\n"
        f"F1-Score: {metrics['f1']:.4f}\n")
    with open(filename, 'w') as file:
        file.write(metrics_str)

# T5 dataset

In this section, we implemented the prediction to the t5 dataset

In [10]:
# Import file
t5_test = pd.read_csv('https://raw.githubusercontent.com/SaraHoxha/emotion-detection-txa/main/Model%20Implementation/data/test_t5.csv')

In [11]:
# Predict label by NRC
t5_test['label_nrc'] = t5_test['text'].apply(lambda x: label_text(x, nrc_lexicon))

In [12]:
# Map the NRC's emotion label to the Parrott's emotion label
t5_test['map_to_parrott'] = t5_test['label_nrc'].apply(map_nrc_to_parrott)

In [13]:
# Count the number of label after predicting
t5_test['map_to_parrott'].value_counts()

map_to_parrott
joy         9561
love        8026
anger       3038
sadness     1911
fear        1624
surprise     314
Name: count, dtype: int64

In [14]:
# Metrics 
y_true_t5 = t5_test['label']
y_pred_t5 = t5_test['map_to_parrott']
t5_metrics_map_to_parrott = calculate_metrics(y_true_t5, y_pred_t5)
t5_metrics_map_to_parrott

{'accuracy': 0.31073792596224564,
 'precision': 0.4444078627270496,
 'recall': 0.31073792596224564,
 'f1': 0.35573164860815665}

In [24]:
save_metrics_to_file(t5_metrics_map_to_parrott, 't5_metrics_nrc.txt')

In [16]:
t5_test.to_csv('t5_NRC_intensity.csv')

# Yangswei_85 dataset

In this section, we implemented the prediction to the Yangswei_85 dataset

In [17]:
# Import file
yangswei_85_test = pd.read_csv('https://raw.githubusercontent.com/SaraHoxha/emotion-detection-txa/main/Model%20Implementation/data/test_yangswei_85.csv')

In [18]:
# Predict label by NRC
yangswei_85_test['label_nrc'] = yangswei_85_test['text'].apply(lambda x: label_text(x, nrc_lexicon))

In [19]:
# Map the NRC's emotion label to the Parrott's emotion label
yangswei_85_test['map_to_parrott'] = yangswei_85_test['label_nrc'].apply(map_nrc_to_parrott)

In [20]:
# Count the number of label after predicting
yangswei_85_test['map_to_parrott'].value_counts()

map_to_parrott
joy         8644
love        7146
anger       2898
sadness     1745
fear        1536
surprise     320
Name: count, dtype: int64

In [21]:
# Metrics 
y_true_yangswei_85 = yangswei_85_test['label']
y_pred_yangswei_85 = yangswei_85_test['map_to_parrott']
yangswei_85_metrics_map_to_parrott = calculate_metrics(y_true_yangswei_85, y_pred_yangswei_85)
yangswei_85_metrics_map_to_parrott

{'accuracy': 0.3315088160078963,
 'precision': 0.5121049128639701,
 'recall': 0.3315088160078963,
 'f1': 0.3974974490104764}

In [25]:
save_metrics_to_file(yangswei_85_metrics_map_to_parrott, 'yangswei_85_metrics_nrc.txt')

In [30]:
yangswei_85_test.to_csv('yangswei_85_test_NRC_intensity.csv')