In [1]:
import pandas as pd

In [2]:
import numpy as np

In [4]:
yangswei_85_test = pd.read_csv('test_yangswei_85.csv')

yangswei_85_test.head()

Unnamed: 0,text,label
0,RTO is the new war on the middle class don't f...,joy
1,How do you continue with life outside of work ...,joy
2,Very desperate for a job would you know a pers...,fear
3,What time do you start working most days quest...,joy
4,What are good job sites to find LEGIT remote w...,joy


In [5]:
lexicon_path = 'NRC-Emotion-Intensity-Lexicon-v1.txt'

def load_nrc_lexicon(file_path):
    lexicon = {}
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split('\t')
            if len(parts) == 3:
                word, emotion, intensity = parts
                intensity = float(intensity)
                if word not in lexicon:
                    lexicon[word] = {}
                lexicon[word][emotion] = intensity
    return lexicon
nrc_lexicon = load_nrc_lexicon(lexicon_path)

In [6]:
def label_text(text, lexicon):
    
    # create the dict to store the score for each emotion
    emotion_scores = {emotion: 0 for emotion in set(e for values in lexicon.values() for e in values)}
    words = text.lower().split()
    
    # calculate the score of emotion based on lexicon
    for word in words:
        if word in lexicon:
            for emotion, intensity in lexicon[word].items():
                emotion_scores[emotion] += intensity
                
    # choose the emotion with the highest score
    max_emotion = max(emotion_scores, key=emotion_scores.get)
    return max_emotion

In [7]:
# predict label by NRC
yangswei_85_test['label_nrc'] = yangswei_85_test['text'].apply(lambda x: label_text(x, nrc_lexicon))

yangswei_85_test.head()

Unnamed: 0,text,label,label_nrc
0,RTO is the new war on the middle class don't f...,joy,fear
1,How do you continue with life outside of work ...,joy,joy
2,Very desperate for a job would you know a pers...,fear,anticipation
3,What time do you start working most days quest...,joy,anticipation
4,What are good job sites to find LEGIT remote w...,joy,trust


In [8]:
yangswei_85_test.to_csv('yangswei_85_nrc_inten_test.csv')

In [9]:
label_counts = yangswei_85_test['label_nrc'].value_counts()
label_counts

label_nrc
trust           7154
joy             5202
anticipation    3451
surprise        1928
sadness         1730
fear            1534
anger            858
disgust          432
Name: count, dtype: int64

In [15]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
y_true = yangswei_85_test['label']
y_pred = yangswei_85_test['label_nrc']

accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# show the results
print("Metrics for NRC:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
output_path = r"E:\Data Science and Business Informatics\Text Analytics\Project\metrics_yangswei_85_nrc.txt"

results = (
    "Metrics for NRC:\n"
    f"Accuracy: {accuracy:.4f}\n"
    f"Precision: {precision:.4f}\n"
    f"Recall: {recall:.4f}\n"
    f"F1-Score: {f1:.4f}\n"
)
with open(output_path, "w") as file:
    file.write(results)

print(f"saved to {output_path}")

saved to E:\Data Science and Business Informatics\Text Analytics\Project\metrics_yangswei_85_nrc.txt


In [19]:
def map_nrc_to_parrott(nrc_emotion):
    mapping = {
        "anger": "anger",
        "anticipation": "joy",  # convert Anticipation to Joy
        "disgust": "sadness",   # convert Disgust to Sadness
        "fear": "fear",
        "joy": "joy",
        "sadness": "sadness",
        "surprise": "surprise",
        "trust": "love",         # convert Trust to Love
        "neutral" : "neutral" # keep the same for unpredicted labels
    }
    return mapping.get(nrc_emotion, None)

In [20]:
yangswei_85_test['map_to_parrott'] = yangswei_85_test['label_nrc'].apply(map_nrc_to_parrott)

In [21]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall,
        'f1': f1}

In [22]:
y_true_yangswei_85= yangswei_85_test['label']
y_pred_yangswei_85 = yangswei_85_test['map_to_parrott']

In [23]:
yangswei_85_metrics_map_to_parrott = calculate_metrics(y_true_yangswei_85, y_pred_yangswei_85)
yangswei_85_metrics_map_to_parrott

{'accuracy': 0.3179146664273857,
 'precision': 0.5281876110726033,
 'recall': 0.3179146664273857,
 'f1': 0.3886600752755366}

In [27]:
def save_metrics_to_file(metrics, filename):
    metrics_str = (f"Accuracy: {metrics['accuracy']:.4f}\n"
        f"Precision: {metrics['precision']:.4f}\n"
        f"Recall: {metrics['recall']:.4f}\n"
        f"F1-Score: {metrics['f1']:.4f}\n")
    with open(filename, 'w') as file:
        file.write(metrics_str)

In [28]:
save_metrics_to_file(yangswei_85_metrics_map_to_parrott, 'yangswei_85_metrics.txt')

In [29]:
yangswei_85_test.to_csv('yangswei_85_test_NRC_map_to_parrott.csv')