# Predict the Emotion Labels based on the Basic NRC Emotion Lexicon 

In [19]:
from collections import Counter
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

## Import dataset

In [2]:
file_path_t5 = r"..\Model Implementation\data\test_t5.csv"
t5_test = pd.read_csv(file_path_t5)

In [3]:
file_path_yangswei_85 = r"..\Model Implementation\data\test_yangswei_85.csv"
yangswei_85_test = pd.read_csv(file_path_yangswei_85)

## NRC Lexicon

This function reads the NRC Emotion Lexicon file and constructs a dictionary where each word is mapped to its associated emotions and scores.

In [4]:
def load_nrc_lexicon(file_path):
    lexicon = {}
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split('\t')
            if len(parts) == 3:
                word, emotion, intensity = parts
                intensity = float(intensity)
                if word not in lexicon:
                    lexicon[word] = {}
                lexicon[word][emotion] = intensity
    return lexicon

In [5]:
# Load NRC Lexicon
nrc_file_path = 'NRC-Emotion-Lexicon-Wordlevel-v0.92.txt'  
nrc_lexicon = load_nrc_lexicon(nrc_file_path)

The predict_emotion function is used to predict the emotion of a given text based on the NRC Emotion Lexicon. It calculates the emotion scores for each word in the text and returns the emotion with the highest score, as well as the scores for all emotions.

In [6]:
# Save predicted label before
predicted_labels = []

# Predict emotion function
def predict_emotion(text, nrc_lexicon):
    words = text.split()
    emotion_scores = {emotion: 0 for emotion in ['anger', 'fear', 'joy', 'sadness', 'surprise', 'trust', 'anticipation', 'disgust']}  # Define emotion categories

    # Calculate emotion scores for each word in the text
    for word in words:
        word = word.lower()  # Convert to lowercase to match NRC lexicon
        if word in nrc_lexicon:
            for emotion, score in nrc_lexicon[word].items():
                if emotion in emotion_scores:
                    emotion_scores[emotion] += score  # Accumulate score for the emotion

    # Assign emotion with the highest score
    if sum(emotion_scores.values()) > 0:  # If scores are non-zero
        predicted_emotion = max(emotion_scores, key=emotion_scores.get)
    else:
        # Use mode of previous predictions if no emotion scores are found
        if predicted_labels:
            predicted_emotion = Counter(predicted_labels).most_common(1)[0][0]

    # Store the predicted label for mode calculation
    predicted_labels.append(predicted_emotion)
    
    return predicted_emotion, emotion_scores

In [7]:
# Predict label by NRC Lexicon
yangswei_85_test['predicted_emotion'], yangswei_85_test['emotion_scores'] = zip(*yangswei_85_test['text'].apply(lambda x: predict_emotion(x, nrc_lexicon)))

In [8]:
# Predict label by NRC Lexicon
t5_test['predicted_emotion'], t5_test['emotion_scores'] = zip(*t5_test['text'].apply(lambda x: predict_emotion(x, nrc_lexicon)))

## Convert to Parrott's emotion¶

After predicting the NRC labels, we mapped our results to Parrott's emotion categories based on the definitions provided in the groups listed at  https://en.wikipedia.org/wiki/Emotion_classification. This mapping was an attempt to align the NRC emotions with Parrott's emotions to observe their correspondence.
In this approach, we highlighted some changes as below:
1. In class "joy" in Parrott's emotion, there are two sub-emotions: "eagerness" and "hope." These emotions share similar meanings with "anticipation" from the NRC lexicon, so we decided to convert it into "joy."
2. In class "anger" in Parrott's emotion, there is the "disgust" in this list, so we decided to convert it into "anger"
3. In class "love" in Parrott's emotion, we added "trust" because it’s an important part of love. Trust helps build strong, loving relationships, so we included it under "love" to show how essential it is.

In [9]:
def map_nrc_to_parrott(nrc_emotion):
    mapping = {
        "anger": "anger",
        "anticipation": "joy",  # convert Anticipation to Joy
        "disgust": "anger",   # convert Disgust to Anger
        "fear": "fear",
        "joy": "joy",
        "sadness": "sadness",
        "surprise": "surprise",
        "trust": "love",         # convert Trust to Love
    }
    return mapping.get(nrc_emotion, None)

In [10]:
# Map the NRC's emotion label to the Parrott's emotion label
t5_test['map_to_parrott'] = t5_test['predicted_emotion'].apply(map_nrc_to_parrott)

In [11]:
# Map the NRC's emotion label to the Parrott's emotion label
yangswei_85_test['map_to_parrott'] = yangswei_85_test['predicted_emotion'].apply(map_nrc_to_parrott)

In [12]:
# Count the number of label after predicting with t5 dataset
t5_test['map_to_parrott'].value_counts()

map_to_parrott
joy         10488
love         5736
anger        3536
fear         2342
sadness      1818
surprise      554
Name: count, dtype: int64

In [13]:
# Count the number of label after predicting with yangswei_85 dataset
yangswei_85_test['map_to_parrott'].value_counts()

map_to_parrott
joy         9534
love        5114
anger       3318
fear        2101
sadness     1683
surprise     539
Name: count, dtype: int64

## Calculate metrics

In [14]:
# Calculate metrics
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall,
        'f1': f1}

In [15]:
# Results of t5 dataset
y_true_t5 = t5_test['label']
y_pred_t5 = t5_test['map_to_parrott']
t5_metrics_map_to_parrott = calculate_metrics(y_true_t5, y_pred_t5)
t5_metrics_map_to_parrott

{'accuracy': 0.3301462776824385,
 'precision': 0.4256384563291098,
 'recall': 0.3301462776824385,
 'f1': 0.3628026551610573}

In [16]:
# Results of yangswei_85 dataset
y_true_yangswei_85_test = yangswei_85_test['label']
y_pred_yangswei_85_test = yangswei_85_test['map_to_parrott']
yangswei_85_metrics_map_to_parrott = calculate_metrics(y_true_yangswei_85_test, y_pred_yangswei_85_test)
yangswei_85_metrics_map_to_parrott

{'accuracy': 0.3637220153438916,
 'precision': 0.5081070224323526,
 'recall': 0.3637220153438916,
 'f1': 0.41795279824908305}

In [17]:
# Save the metrics to the result file
def save_metrics_to_file(metrics, filename):
    metrics_str = (f"Accuracy: {metrics['accuracy']:.4f}\n"
        f"Precision: {metrics['precision']:.4f}\n"
        f"Recall: {metrics['recall']:.4f}\n"
        f"F1-Score: {metrics['f1']:.4f}\n")
    with open(filename, 'w') as file:
        file.write(metrics_str)

In [18]:
save_metrics_to_file(t5_metrics_map_to_parrott, 't5_metrics_nrc.txt')
save_metrics_to_file(yangswei_85_metrics_map_to_parrott, 'yangswei_85_metrics_nrc.txt')