In [1]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, log_loss, matthews_corrcoef,
    balanced_accuracy_score
)
from transformers import pipeline
import pandas as pd
import numpy as np

In [2]:
# Set display option to show full text in all columns
pd.set_option('display.max_colwidth', None)

In [3]:
# Read dataset
df = pd.read_csv('../../dataset/test.csv')

# Display a quick overview of the dataset
display(df.reset_index(drop=True))

Unnamed: 0,text,label,label_name
0,im feeling rather rotten so im not very ambitious right now,0,sadness
1,im updating my blog because i feel shitty,0,sadness
2,i never make her separate from me because i don t ever want her to feel like i m ashamed with her,0,sadness
3,i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived,1,joy
4,i was feeling a little vain when i did this one,0,sadness
...,...,...,...
1995,i just keep feeling like someone is being unkind to me and doing me wrong and then all i can think of doing is to get back at them and the people they are close to,3,anger
1996,im feeling a little cranky negative after this doctors appointment,3,anger
1997,i feel that i am useful to my people and that gives me a great feeling of achievement,1,joy
1998,im feeling more comfortable with derby i feel as though i can start to step out my shell,1,joy


In [4]:
def predict(classifier: pipeline, texts: list[str]) -> pd.DataFrame:
    # Run classification
    outputs = classifier(texts)
    
    # Map label IDs to emotion names
    id2label = {
        "LABEL_0": "sadness",
        "LABEL_1": "joy",
        "LABEL_2": "love",
        "LABEL_3": "anger",
        "LABEL_4": "fear",
        "LABEL_5": "surprise"
    }

    texts_list, labels, label_names, scores = [], [], [], []
    for index in range(len(outputs)):
        texts_list.append(texts[index])
        output = outputs[index][0]
        label_key = output["label"]
        labels.append(int(label_key.split("_")[1]))
        label_names.append(id2label[label_key])
        scores.append(output["score"])
        
    results = {
        'text': texts_list,
        'predicted_label': labels,
        'predicted_label_name': label_names,
        'score': scores
    }
    result_df = pd.DataFrame(results)

    return result_df

In [5]:
def evaluate_model(actual: list[int], predicted: list[int]) -> (pd.DataFrame, pd.DataFrame):
    
    # Calculate metrics
    accuracy = accuracy_score(actual, predicted)
    precision = precision_score(actual, predicted, average='weighted', zero_division=0)
    recall = recall_score(actual, predicted, average='weighted')
    f1 = f1_score(actual, predicted, average='weighted')
    cm = confusion_matrix(actual, predicted)

    # Use label names for index and columns
    ordered_labels_names = np.array([0, 1, 2, 3, 4, 5])
    cm_df = pd.DataFrame(cm, columns=ordered_labels_names, index=ordered_labels_names)

    # Create a dictionary of metrics and descriptions
    metrics_dict = {
        "Metric": [
            "Accuracy",
            "Precision",
            "Recall (Sensitivity)",
            "F1 Score",
        ],
        "Value": [
            accuracy,
            precision,
            recall,
            f1,
        ],
        "Description": [
            "Ratio of correct predictions to total predictions",
            "True Positives / (True Positives + False Positives)",
            "True Positives / (True Positives + False Negatives)",
            "Harmonic mean of precision and recall",
        ],
    }
    
    # Convert to DataFrame
    metrics_df = pd.DataFrame(metrics_dict)
    
    # Display nicely rounded results
    metrics_df["Value"] = metrics_df["Value"].apply(lambda x: round(x, 3))

    return metrics_df, cm_df

In [6]:
use_model = "Swoodplays/emotion-classification"

# Load model
classifier = pipeline(
    "text-classification", 
    model=use_model, 
    top_k=1
)

# Predict
pred_df = predict(classifier, texts=df["text"].tolist())
actual = df["label"].tolist()
predicted = pred_df["predicted_label"].tolist()

# Evaluate model
metrics_df, cm_df = evaluate_model(actual, predicted)

# Display evaluation results
print(f"Evaluation Metrics for {use_model}:")
display(metrics_df)
print(f"Confusion Matrix for {use_model}:")
display(cm_df)
print()




Device set to use cpu


Evaluation Metrics for Swoodplays/emotion-classification:


Unnamed: 0,Metric,Value,Description
0,Accuracy,0.929,Ratio of correct predictions to total predictions
1,Precision,0.931,True Positives / (True Positives + False Positives)
2,Recall (Sensitivity),0.929,True Positives / (True Positives + False Negatives)
3,F1 Score,0.93,Harmonic mean of precision and recall


Confusion Matrix for Swoodplays/emotion-classification:


Unnamed: 0,0,1,2,3,4,5
0,564,3,2,8,4,0
1,1,646,39,3,0,6
2,0,19,139,1,0,0
3,9,3,0,254,9,0
4,6,0,0,7,202,9
5,2,1,0,0,10,53



