In [31]:
from nrclex import NRCLex
import pandas as pd

In [32]:
t5_test = pd.read_csv('test_t5.csv')

In [33]:
yangswei_85_test = pd.read_csv('test_yangswei_85.csv')

# Predict NRC's emotion and convert to Parrott's emotion based on the Parrott's emotions by groups

Based on the definition of Parrott's emotions by groups at https://en.wikipedia.org/wiki/Emotion_classification , I tried to map the NRC emotion to the Parrott's emotion to observe their correspondence.

In [34]:
def predict_emotion_nrclex_ignore_sentiments(text):
    emotion_data = NRCLex(text)
    # don't count "positive" and "negative" sentiment
    filtered_emotions = {emotion: score for emotion, score in emotion_data.raw_emotion_scores.items()
                         if emotion not in ['positive', 'negative']}
    
    if filtered_emotions:
        # choose the label having the highest score
        dominant_emotion = max(filtered_emotions, key=filtered_emotions.get)
        return dominant_emotion
    else:
        return "neutral"  # return "neutral if the model could not detect any emotion

In [35]:
def map_nrc_to_parrott(nrc_emotion):
    mapping = {
        "anger": "anger",
        "anticipation": "joy",  # convert Anticipation to Joy
        "disgust": "sadness",   # convert Disgust to Sadness
        "fear": "fear",
        "joy": "joy",
        "sadness": "sadness",
        "surprise": "surprise",
        "trust": "love",         # convert Trust to Love
        "neutral" : "neutral" # keep the same for unpredicted labels
    }
    return mapping.get(nrc_emotion, None)

In [36]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall,
        'f1': f1}

In [37]:
def save_metrics_to_file(metrics, filename):
    metrics_str = (f"Accuracy: {metrics['accuracy']:.4f}\n"
        f"Precision: {metrics['precision']:.4f}\n"
        f"Recall: {metrics['recall']:.4f}\n"
        f"F1-Score: {metrics['f1']:.4f}\n")
    with open(filename, 'w') as file:
        file.write(metrics_str)

## t5 dataset

In [38]:
t5_test['label_nrc'] = t5_test['text'].apply(predict_emotion_nrclex_ignore_sentiments)

In [39]:
t5_test['map_to_parrott'] = t5_test['label_nrc'].apply(map_nrc_to_parrott)

In [40]:
label_counts = t5_test['map_to_parrott'].value_counts()
label_counts

map_to_parrott
joy         8230
love        5815
neutral     3716
anger       2515
sadness     2159
fear        1548
surprise     491
Name: count, dtype: int64

In [26]:
y_true_t5 = t5_test['label']
y_pred_t5 = t5_test['map_to_parrott']

In [27]:
t5_metrics_map_to_parrott = calculate_metrics(y_true_t5, y_pred_t5)
t5_metrics_map_to_parrott

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'accuracy': 0.26268693307183133,
 'precision': 0.42847967489226385,
 'recall': 0.26268693307183133,
 'f1': 0.31598684885365796}

In [41]:
save_metrics_to_file(t5_metrics_map_to_parrott, 't5_metrics_map_to_parrott.txt')

## yangswei_85 dataset

In [42]:
yangswei_85_test['label_nrc'] = yangswei_85_test['text'].apply(predict_emotion_nrclex_ignore_sentiments)

In [43]:
yangswei_85_test['map_to_parrott'] = yangswei_85_test['label_nrc'].apply(map_nrc_to_parrott)

In [None]:
y_true_yangswei_85= yangswei_85_test['label']
y_pred_yangswei_85 = yangswei_85_test['map_to_parrott']

In [None]:
yangswei_85_metrics_map_to_parrott = calculate_metrics(y_true_t5, y_pred_t5)
yangswei_85_metrics_map_to_parrott