In [1]:
from nrclex import NRCLex
import pandas as pd

## Predict emotion by NRC lexicon (dont' use the sentiments)

### T5 dataset

In [2]:
t5_test = pd.read_csv('test_t5.csv')

In [3]:
def predict_emotion_nrclex_ignore_sentiments(text):
    emotion_data = NRCLex(text)
    # don't count "positive" and "negative" sentiment
    filtered_emotions = {emotion: score for emotion, score in emotion_data.raw_emotion_scores.items()
                         if emotion not in ['positive', 'negative']}
    
    if filtered_emotions:
        # choose the label having the highest score
        dominant_emotion = max(filtered_emotions, key=filtered_emotions.get)
        return dominant_emotion
    else:
        return "neutral"  # return "neutral if the model could not detect any emotion

In [4]:
t5_test['label_nrc'] = t5_test['text'].apply(predict_emotion_nrclex_ignore_sentiments)

In [26]:
t5_test.to_csv('t5_test_nrc.csv')

### Yangswei_85 dataset

In [5]:
yangswei_85_test = pd.read_csv('test_yangswei_85.csv')

In [6]:
yangswei_85_test['label_nrc'] = yangswei_85_test['text'].apply(predict_emotion_nrclex_ignore_sentiments)

In [9]:
yangswei_85_test.head()

Unnamed: 0,text,label,label_nrc
0,RTO is the new war on the middle class don't f...,joy,fear
1,How do you continue with life outside of work ...,joy,anticipation
2,Very desperate for a job would you know a pers...,fear,anticipation
3,What time do you start working most days quest...,joy,anticipation
4,What are good job sites to find LEGIT remote w...,joy,trust


In [8]:
label_counts = yangswei_85_test['label_nrc'].value_counts()
label_counts

label_nrc
anticipation    5776
trust           5199
neutral         3363
anger           2357
joy             1689
sadness         1510
fear            1395
disgust          521
surprise         479
Name: count, dtype: int64

In [25]:
yangswei_85_test.to_csv('yangswei_85_test_nrc.csv')

### Calculate metrics

In [13]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall,
        'f1': f1}

In [17]:
y_true_t5 = t5_test['label']
y_pred_t5 = t5_test['label_nrc']
y_true_yangswei_85 = yangswei_85_test['label']
y_pred_yangswei_85 = yangswei_85_test['label_nrc']

In [20]:
t5_metrics = calculate_metrics(y_true_t5, y_pred_t5)
t5_metrics

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'accuracy': 0.12351883631609055,
 'precision': 0.47043178625411086,
 'recall': 0.12351883631609055,
 'f1': 0.1883580531231452}

In [21]:
yangswei_85_metrics = calculate_metrics(y_true_yangswei_85, y_pred_yangswei_85)
yangswei_85_metrics

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'accuracy': 0.1096056350666248,
 'precision': 0.524316457758694,
 'recall': 0.1096056350666248,
 'f1': 0.16550956179207252}

In [16]:
def save_metrics_to_file(metrics, filename):
    metrics_str = (f"Accuracy: {metrics['accuracy']:.4f}\n"
        f"Precision: {metrics['precision']:.4f}\n"
        f"Recall: {metrics['recall']:.4f}\n"
        f"F1-Score: {metrics['f1']:.4f}\n")
    with open(filename, 'w') as file:
        file.write(metrics_str)

In [22]:
save_metrics_to_file(t5_metrics, 't5_metrics.txt')

In [23]:
save_metrics_to_file(yangswei_85_metrics, 'yangswei_85_metrics.txt')