In [1]:
#%python -m venv myenv

# Activate the virtual environment
#source myenv/bin/activate  # On Unix/Linux
# %./myenv/Scripts/activate  # On Windows

# Uses Cuda Toolkit 12.4 (Download from Nvidia Website if you own a NVIDIA Graphics card)

# Uncomment Below if running this file as main
# %pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 tf-keras transformers datasets evaluate scikit-learn transformers[torch] --quiet 

import pandas as pd
import torch
from collections import Counter
from datasets import load_dataset
from sklearn.preprocessing import LabelEncoder 
from transformers import AutoTokenizer
from transformers import AutoConfig

print(f"\nIs Cuda Available: {torch.cuda.is_available()}")
print(f"Cuda Version: {torch.version.cuda}")


Is Cuda Available: True
Cuda Version: 12.4


In [2]:
# Load your CSV file
df = pd.read_csv("../data/raw/reddit_mental_health_dataset.csv")

# Replace common encoding issues
df['text'] = df['text'].str.replace("â€™", "'", regex=False)
df['text'] = df['text'].str.replace("â€œ", '"', regex=False)
df['text'] = df['text'].str.replace("â€�", '"', regex=False)
df['text'] = df['text'].str.replace("â€“", "-", regex=False)
df['text'] = df['text'].str.replace("â€˜", "'", regex=False)
df['text'] = df['text'].str.replace("Ã", " ", regex=False)
df['text'] = df['text'].str.replace("&amp", "&", regex=False)
df = df[df['text'].notna()]                # Drop NaN values
df = df[df['text'].str.strip() != ""]      # Drop empty/whitespace-only strings

# Save cleaned data
df.to_csv("../data/processed/cleaned_reddit_mental_health_dataset.csv", index=False)

In [3]:
# Fine-Tuning j-hartmann/emotion-english-distilroberta-base on Custom Emotion Dataset

# Load and Split Dataset: 80% Training and 20% Testing
dataset = load_dataset("csv", data_files="../data/processed/cleaned_reddit_mental_health_dataset.csv")
split_dataset = dataset["train"].train_test_split(test_size=0.2)

Generating train split: 0 examples [00:00, ? examples/s]

# Pretrained LLM Model #1: "SchuylerH/bert-multilingual-go-emtions"

" A fine-tuned BERT model for cross-language emotion classification on the GoEmotions dataset. This model is unique as it has been trained on a multilingual dataset comprising of English and Chinese texts. It is capable of classifying text into one of 28 different emotion categories.

The 28 emotion categories, according to the GoEmotions taxonomy, are: 'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', and 'neutral'."

## Model Performance

- Accuracy: 85.95%
- Precision: 91.99% 
- Recall: 89.56% 
- F1 Score: 90.17%

## Training

"The dataset is split into two parts:

Labeled data: Used for initial training. It includes both English and machine translated Chinese samples. This labeled data is further split into a training set (80%) and a validation set (20%).
Unlabeled data: Used for making predictions and adding confidently predicted samples to the training data. It includes both English and machine translated Chinese samples."

"The model is trained for a total of 20 epochs (10 epochs for each stage). Precision, recall, and F1 score are logged during training."

Dataset(s) used: 
1) google-research-datasets/go_emotions with 58k curated Reddit comments

Reference: SchuylerH, "BERT-Multilingual Go Emotions. https://huggingface.co/SchuylerH/bert-multilingual-go-emtions/, 2023.

# Pretrained LLM Model #2: "j-hartmann/emotion-english-distilroberta-base"

" With this model, you can classify emotions in English text data. The model was trained on 6 diverse datasets (see Appendix below) and predicts Ekman's 6 basic emotions, plus a neutral class:

 - anger 
 - disgust 
 - fear 
 - joy 
 - neutral 
 - sadness 
 - surprise 

The model is a fine-tuned checkpoint of DistilRoBERTa-base. "

## Model Performance

- Accuracy: ~66% (random-chance baseline of 1/7 = 14%).

## Training

"The model is trained on a balanced subset from the datasets listed above (2,811 observations per emotion, i.e., nearly 20k observations in total). 80% of this balanced subset is used for training and 20% for evaluation"

Dataset(s) used: 
1) Crowdflower (2016)
2) Emotion Dataset, Elvis et al. (2018)
3) GoEmotions, Demszky et al. (2020)
4) ISEAR, Vikash (2018)
5) MELD, Poria et al. (2019)
6) SemEval-2018, EI-reg, Mohammad et al. (2018)


Reference: Jochen Hartmann, "Emotion English DistilRoBERTa-base". https://huggingface.co/j-hartmann/emotion-english-distilroberta-base/, 2022.

Between the two pre-trained LLM, we decided to explore the "SchuylerH/bert-multilingual-go-emtions" model due to its greater emotional classification breadth and overall better performance metrics when compared to its counterpart.

In [4]:
# model_name = "j-hartmann/emotion-english-distilroberta-base"
model_name = "SchuylerH/bert-multilingual-go-emtions"

config = AutoConfig.from_pretrained(model_name)
config.num_labels = 28
id2label = config.id2label
label2id = config.label2id
config.problem_type = "single_label_classification"

# Adjusting label2id to correct values for emotions

label2id['anger'] = 2
label2id['optimism'] = 20
del label2id['LABEL_2']

print(id2label)
print(label2id)

{0: 'admiration', 1: 'amusement', 2: 'anger', 3: 'annoyance', 4: 'approval', 5: 'caring', 6: 'confusion', 7: 'curiosity', 8: 'desire', 9: 'disappointment', 10: 'disapproval', 11: 'disgust', 12: 'embarrassment', 13: 'excitement', 14: 'fear', 15: 'gratitude', 16: 'grief', 17: 'joy', 18: 'love', 19: 'nervousness', 20: 'optimism', 21: 'pride', 22: 'realization', 23: 'relief', 24: 'remorse', 25: 'sadness', 26: 'surprise', 27: 'neutral'}
{'admiration': 0, 'amusement': 1, 'disapproval': 10, 'disgust': 11, 'embarrassment': 12, 'excitement': 13, 'fear': 14, 'gratitude': 15, 'grief': 16, 'joy': 17, 'love': 18, 'nervousness': 19, 'anger': 2, 'pride': 21, 'realization': 22, 'relief': 23, 'remorse': 24, 'sadness': 25, 'surprise': 26, 'neutral': 27, 'annoyance': 3, 'approval': 4, 'caring': 5, 'confusion': 6, 'curiosity': 7, 'desire': 8, 'disappointment': 9, 'optimism': 20}


In [5]:
# Map Custom Labels within processed cleaned reddit mental health dataset to potential emotion classification
def map_labels(example):
    mapping = {
        0: "nervousness",   # Stress
        1: "sadness",   # Depression
        2: "neutral",    # Bipolar (mixed, acceptable)
        3: "confusion",   # Personality disorder
        4: "nervousness"    # Anxiety
    }
    example["label"] = label2id[mapping[example["target"]]]
    return example

# Mapping split dataset with these labels
split_dataset = split_dataset.map(map_labels)

# Printing out emotion distribution of training data
print(Counter(split_dataset["train"]["label"]))

Map:   0%|          | 0/4485 [00:00<?, ? examples/s]

Map:   0%|          | 0/1122 [00:00<?, ? examples/s]

Counter({19: 1794, 25: 964, 6: 864, 27: 863})


In [6]:
# Tokenizer 
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(example):
    return tokenizer(example['text'], truncation=True, padding="max_length")

tokenized_dataset = split_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/4485 [00:00<?, ? examples/s]

Map:   0%|          | 0/1122 [00:00<?, ? examples/s]

In [None]:
# Load Pretrained Model
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
import evaluate

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    config=config
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training Setup

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = predictions.argmax(axis=1)
    return accuracy.compute(predictions=preds, references=labels)

training_args = TrainingArguments(
    output_dir="../models/custom-emotion-model/results",
    eval_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,
    learning_rate = 2e-5,
    num_train_epochs=5,
    weight_decay=0.01,
    warmup_steps=100,
    lr_scheduler_type="linear",
    fp16=True,                     # Mixed precision
    logging_dir="../models/custom-emotion-model/logs",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",  
    greater_is_better=True,
    report_to=None            
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)

# # Train the Model
# trainer.train()




  trainer = Trainer(



Here are the training/evaluation results of the fine-tuned model:

| Epoch | Training Loss | Validation Loss | Accuracy |
|:-----:|:-------------:|:---------------:|:--------:|
| 1     | No log        | 0.709411         | 0.745989 |
| 2     | 1.192300      | 0.585536         | 0.785205 |
| 3     | 1.192300      | 0.577091         | 0.809269 |
| 4     | 0.349500      | 0.627049         | 0.834225 |
| 5     | 0.349500      | 0.713840         | 0.836007 |

It can be noticed that the model is beginning to overfit with respect to its training data as illustrated by a diminishing loss in the training and the increasing loss in the validation set...

In [8]:
# # Save the Fine-Tuned Model and Tokenizer
# model.save_pretrained("../models/custom-emotion-model")
# tokenizer.save_pretrained("../models/custom-emotion-model")

In [9]:
# Example Test(s)

# Loading Model from models/custom-emotion-model (if not trained yourself) [Otherwise comment/uncomment this section]
model_path = "../models/custom-emotion-model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

text = "I am feeling anxious"
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

outputs = model(**inputs)
predicted_label = outputs.logits.argmax().item()

print(outputs.logits)
print(outputs.logits.argmax().item())

# Correctly classifies emotion
print("Predicted Emotion:", id2label[predicted_label])

text = "I am feeling angry"
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
predicted_label = outputs.logits.argmax().item()

print(outputs.logits)
print(outputs.logits.argmax().item())

# Incorrectly classifies emotion
print("Predicted Emotion:", id2label[predicted_label])

tensor([[-5.7229, -3.8040, -4.8093, -4.8766, -3.8949, -2.7586, -1.5936, -3.8327,
         -1.7292, -1.7711, -4.2872, -2.5619, -1.4044, -2.8216,  0.4875, -3.2747,
         -2.2845, -3.5477, -5.4368,  6.5878, -1.9467, -3.2876, -2.8828, -1.9580,
         -4.1615, -0.6223, -3.1728, -2.1710]], grad_fn=<AddmmBackward0>)
19
Predicted Emotion: nervousness
tensor([[-7.1455, -5.6492, -4.6773, -5.1987, -5.9807, -4.6578,  0.1507, -4.5959,
         -4.0462, -2.3464, -4.9018, -2.1462, -2.2715, -5.8753, -1.5151, -5.8423,
         -3.9764, -6.1414, -6.1403,  4.7096, -3.8348, -5.4640, -4.3893, -4.7504,
         -5.6529, -0.9728, -5.7294, -1.8183]], grad_fn=<AddmmBackward0>)
19
Predicted Emotion: nervousness


Upon fine-tuning the above model based on the cleaned reddit mental health dataset and evaluating the training loss, evaluation loss, and accuracy, our group decided to opt for the pre-trained LLM model as our ideal use-case model without fine-tuning it since it is trained with a far greater breadth of data compared to the reddit mental health dataset. 

In addition, this decision is supported by how the model is more likely to misclassify emotions on emotional examples it has never seen, as illustrated in the example test(s) above.

As such, it would benefit us greatly to utilize these pre-trained results for our project workflow's emotion detection based on a user's (Log Tracker) notes.

In [10]:
# Use the Zero-Shot Classification Model: "SchuylerH/bert-multilingual-go-emtions" to identify emotions of a user for project workflow
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Load model
emotion_model_name = "SchuylerH/bert-multilingual-go-emtions"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
emotion_model.to(device)

# Emotion label mapping
emotion_label_names = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 
               'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 
               'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 
               'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 
               'relief', 'remorse', 'sadness', 'surprise', 'neutral']

# Superclass emotion grouping
positive_emotions = {"admiration","amusement", "approval", "caring", "excitement", "gratitude", "joy", "love", "optimism", "pride", "relief"}
negative_emotions = {"anger", "disappointment", "disapproval", "disgust", "embarrassment", "fear", "grief", "remorse", "sadness", "annoyance", "nervousness"}
neutral_emotions  = {"neutral", "confusion", "curiosity", "desire", "realization", "surprise"}

# Assign indices for each emotion into corresponding group
positive_idx = [i for i, label in enumerate(emotion_label_names) if label in positive_emotions]
negative_idx = [i for i, label in enumerate(emotion_label_names) if label in negative_emotions]
neutral_idx  = [i for i, label in enumerate(emotion_label_names) if label in neutral_emotions]

In [11]:
# Duplicate normalized superclass probabilities across 28 emotion labels for their corresponding superclass emotion grouping
def expand_superclass_probs(superclass_probs, num_labels=28):
    extended_superclass_probs = torch.zeros(num_labels)

    # Normalize the probability values
    superclass_probs = F.softmax(superclass_probs, dim=0)  

    for idx in negative_idx:
        extended_superclass_probs[idx] = superclass_probs[0] / len(negative_idx)
    for idx in neutral_idx:
        extended_superclass_probs[idx] = superclass_probs[1] / len(neutral_idx)
    for idx in positive_idx:
        extended_superclass_probs[idx] = superclass_probs[2] / len(positive_idx)

    return extended_superclass_probs

In [12]:
# Apply weights to both probabilities tensors and sum them for the final probability tensor
def merge_probs(primary_probs, secondary_probs, alpha=0.7):
    return alpha * primary_probs + (1 - alpha) * secondary_probs

In [13]:
# Classify emotion
def classify_emotion(final_probs):
    top_idx = torch.argmax(final_probs).item()
    emotion = emotion_label_names[top_idx]
    if top_idx in positive_idx:
        superclass = "Positive"
    elif top_idx in negative_idx:
        superclass = "Negative"
    else:
        superclass = "Neutral"
    return superclass, emotion, final_probs[top_idx].item()

In [14]:
# Inference
def run_emotion_pipeline(text, regressor_probs):
    inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = emotion_model(**inputs)
    logits = outputs.logits
    primary_probs = F.softmax(logits, dim=1).squeeze()  # Desired shape: [28]

    secondary_probs = expand_superclass_probs(regressor_probs).to(primary_probs) # Desired shape: [28]
    final_probs = merge_probs(primary_probs, secondary_probs)

    superclass, sub_emotion, confidence = classify_emotion(final_probs)
    return {
        "superclass": superclass,
        "sub_emotion": sub_emotion,
        "confidence": confidence,
        "top_5_emotions": [(emotion_label_names[i], final_probs[i].item()) for i in torch.topk(final_probs, 5).indices]
    }

In [15]:
# Example usage
text_input = "My day was bad today, I almost got ran over by a car"

# Secondary model probability output (Regressor Model) to be used - Example value for now...
superclass_probs = torch.tensor([0.6, 0.3, 0.1], device=device)  # Negative, Neutral, Positive
result = run_emotion_pipeline(text_input, superclass_probs)

print("Emotion Classification Result:")
print(f"  Superclass: {result['superclass']}")
print(f"  Sub-emotion: {result['sub_emotion']} ({result['confidence']:.3f})")
print("  Top 5 Emotions:")
for label, prob in result["top_5_emotions"]:
    print(f"    - {label}: {prob:.4f}")

Emotion Classification Result:
  Superclass: Negative
  Sub-emotion: disappointment (0.326)
  Top 5 Emotions:
    - disappointment: 0.3259
    - annoyance: 0.1370
    - disgust: 0.0727
    - sadness: 0.0634
    - relief: 0.0476
