**Nina Dobša, zadnje uređivano 7.7.2025.** 

# Imports

In [1]:
import pandas as pd
from gensim.models import Word2Vec
from transformers import BertTokenizer, BertForMaskedLM
import torch
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate, StratifiedKFold, cross_val_predict
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


# GoEmotions dataset 

In [2]:
splits = {'train': 'simplified/train-00000-of-00001.parquet', 
          'validation': 'simplified/validation-00000-of-00001.parquet', 
          'test': 'simplified/test-00000-of-00001.parquet'}

goemotions_train = pd.read_parquet("hf://datasets/google-research-datasets/go_emotions/" + splits["train"])
goemotions_validation = pd.read_parquet("hf://datasets/google-research-datasets/go_emotions/" + splits["validation"])
goemotions_test = pd.read_parquet("hf://datasets/google-research-datasets/go_emotions/" + splits["test"])

In [3]:
goemotions_data = pd.concat([goemotions_train, goemotions_validation, goemotions_test], ignore_index=True)

In [4]:
goemotions_data.shape

(54263, 3)

Cleaning and labeling emotions with 0 and 1

In [5]:
# Mapping numbers from labels with emotions
emotion_dictionary = { 0 : 'admiration', 1 : 'amusement', 2 : 'anger', 3 : 'annoyance', 4 : 'approval', 5 : 'caring', 6 : 'confusion', 7 : 'curiosity', 8 : 'desire',
                      9 : 'disappointment', 10 : 'disapproval', 11 : 'disgust', 12 : 'embarrassment', 13 : 'excitement', 14 : 'fear', 15 : 'gratitude', 16 : 'grief',
                      17 : 'joy', 18 : 'love', 19 : 'nervousness', 20 : 'optimism', 21 : 'pride', 22 : 'realization', 23 : 'relief', 24 : 'remorse', 25 : 'sadness',
                      26 : 'surprise', 27 : 'neutral' }


target_emotions = {'anger': 2, 'sadness': 25, 'joy': 17, 'disgust' : 11, 'surprise' : 26}

In [6]:
# Creating new columns for each emotion (anger, joy, sadness, disgust, surprise)
# in train, validation and test goemotion datasets
for emotion, label in target_emotions.items():
    goemotions_data[emotion] = goemotions_data['labels'].apply(lambda x: 1 if label in x else 0)

In [7]:
# Dropping id and labels columns, we don't need them
goemotions_data = goemotions_data.drop(columns = ["id", "labels"])

In [8]:
goemotions_data.head()

Unnamed: 0,text,anger,sadness,joy,disgust,surprise
0,My favourite food is anything I didn't have to...,0,0,0,0,0
1,"Now if he does off himself, everyone will thin...",0,0,0,0,0
2,WHY THE FUCK IS BAYLESS ISOING,1,0,0,0,0
3,To make her feel threatened,0,0,0,0,0
4,Dirty Southern Wankers,0,0,0,0,0


## Word2Vec model

In [9]:
# Import of fine tuned word2vec model
model_path_SG = "fine_tuned_word2vec_sg/fine_tuned_word2vec_sg.model"
model_path_CBOW = "fine_tuned_word2vec_cbow/fine_tuned_word2vec_cbow.model"
word2vec_model_SG = Word2Vec.load(model_path_SG)
word2vec_model_CBOW = Word2Vec.load(model_path_CBOW)

In [10]:
# Function that returns the word2vec embedding for a given sentence
def get_word2vec_embedding(sentence, word2vec_model):
    words = sentence.split()     # Tokenization
    word_embeddings = [word2vec_model.wv[word] for word in words if word in word2vec_model.wv] # Get embeddings for words that exist in the Word2Vec vocabulary
    
    if not word_embeddings:  # If no words are in the vocabulary, return a zero vector
        return np.zeros(word2vec_model.vector_size)
    
    sentence_embedding = np.mean(word_embeddings, axis=0) # compute the mean of all word embeddings to get the emdedding for sentence
    return sentence_embedding

In [11]:
# Adding new "word2vec_embedding" column to train, test and validation goemotion datasets 
# Applying get_word2vec_embedding to all sentences
goemotions_data['word2vec_embedding_SG'] = goemotions_data['text'].apply(
    lambda sentence: get_word2vec_embedding(sentence, word2vec_model_SG)
)

In [12]:
# Adding new "word2vec_embedding" column to train, test and validation goemotion datasets 
# Applying get_word2vec_embedding to all sentences
goemotions_data['word2vec_embedding_CBOW'] = goemotions_data['text'].apply(
    lambda sentence: get_word2vec_embedding(sentence, word2vec_model_CBOW)
)

In [13]:
goemotions_data.tail()

Unnamed: 0,text,anger,sadness,joy,disgust,surprise,word2vec_embedding_SG,word2vec_embedding_CBOW
54258,Thanks. I was diagnosed with BP 1 after the ho...,0,0,0,0,0,"[-0.011911079, 0.1247393, -0.03292984, -0.0216...","[-0.35409313, 0.18309589, 0.7523857, -0.554011..."
54259,Well that makes sense.,0,0,0,0,0,"[-0.053056713, 0.366973, -0.03590046, -0.05008...","[-0.49808195, -0.40611854, 0.55714554, -0.2184..."
54260,Daddy issues [NAME],0,0,0,0,0,"[-0.09085079, 0.40177393, -0.027788045, 0.1444...","[0.10302511, 0.25298855, 0.85893273, 0.8379355..."
54261,So glad I discovered that subreddit a couple m...,0,0,0,0,0,"[0.095128104, 0.26241234, 0.091321245, 0.07458...","[0.3417902, -0.33821112, 1.1506916, -0.2599661..."
54262,"Had to watch ""Elmo in Grouchland"" one time too...",0,0,0,0,0,"[0.020561595, 0.23708051, -0.13485207, -0.0036...","[0.07777702, -0.30598238, 0.44885072, -0.04225..."


## ML algorithms for word2vec emdeddings

In [14]:
# Defining the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

In [15]:
# Defining 5-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

### Skip gram method

In [16]:
# Splitting data
X_SG = np.array(goemotions_data['word2vec_embedding_SG'].tolist())   # Word embeddings as features

y_anger = goemotions_data['anger']  
y_joy = goemotions_data['joy']
y_sadness = goemotions_data['sadness']
y_disgust = goemotions_data['disgust']
y_surprise = goemotions_data['surprise']

In [17]:
# Performing 5-fold cross validation
scoring_metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
scores_anger_sg = cross_validate(clf, X_SG, y_anger, cv=kf, scoring=scoring_metrics)
scores_joy_sg = cross_validate(clf, X_SG, y_joy, cv=kf, scoring=scoring_metrics)
scores_sadness_sg = cross_validate(clf, X_SG, y_sadness, cv=kf, scoring=scoring_metrics)
scores_disgust_sg = cross_validate(clf, X_SG, y_disgust, cv=kf, scoring=scoring_metrics)
scores_surprise_sg = cross_validate(clf, X_SG, y_surprise, cv=kf, scoring=scoring_metrics)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
# Calculating and printing the avg score +/- std dev
for metric in scoring_metrics:
    avg_score_anger_sg = np.mean(scores_anger_sg[f'test_{metric}'])
    std_dev_anger_sg = np.std(scores_anger_sg[f'test_{metric}'])

    avg_score_joy_sg = np.mean(scores_joy_sg[f'test_{metric}'])
    std_dev_joy_sg = np.std(scores_joy_sg[f'test_{metric}'])

    avg_score_sadness_sg = np.mean(scores_sadness_sg[f'test_{metric}'])
    std_dev_sadness_sg = np.std(scores_sadness_sg[f'test_{metric}'])

    avg_score_disgust_sg = np.mean(scores_disgust_sg[f'test_{metric}'])
    std_dev_disgust_sg = np.std(scores_disgust_sg[f'test_{metric}'])

    avg_score_surprise_sg = np.mean(scores_surprise_sg[f'test_{metric}'])
    std_dev_surprise_sg = np.std(scores_surprise_sg[f'test_{metric}'])


    print(f"{metric} anger: {avg_score_anger_sg:.4f} +/- {std_dev_anger_sg:.4f}")
    print(f"{metric} joy: {avg_score_joy_sg:.4f} +/- {std_dev_joy_sg:.4f}")
    print(f"{metric} sadness: {avg_score_sadness_sg:.4f} +/- {std_dev_sadness_sg:.4f}")
    print(f"{metric} disgust: {avg_score_disgust_sg:.4f} +/- {std_dev_disgust_sg:.4f}")
    print(f"{metric} surprise: {avg_score_surprise_sg:.4f} +/- {std_dev_surprise_sg:.4f}")
    print("\n")

accuracy anger: 0.9636 +/- 0.0002
accuracy joy: 0.9671 +/- 0.0005
accuracy sadness: 0.9700 +/- 0.0001
accuracy disgust: 0.9812 +/- 0.0002
accuracy surprise: 0.9750 +/- 0.0002


precision_macro anger: 0.7048 +/- 0.0202
precision_macro joy: 0.7270 +/- 0.0941
precision_macro sadness: 0.7311 +/- 0.0516
precision_macro disgust: 0.6783 +/- 0.1714
precision_macro surprise: 0.5220 +/- 0.0430


recall_macro anger: 0.5131 +/- 0.0011
recall_macro joy: 0.5119 +/- 0.0050
recall_macro sadness: 0.5066 +/- 0.0020
recall_macro disgust: 0.5024 +/- 0.0015
recall_macro surprise: 0.5005 +/- 0.0009


f1_macro anger: 0.5166 +/- 0.0021
f1_macro joy: 0.5151 +/- 0.0095
f1_macro sadness: 0.5055 +/- 0.0040
f1_macro disgust: 0.5001 +/- 0.0030
f1_macro surprise: 0.4951 +/- 0.0018




### CBOW method

In [19]:
# Splitting data
X_CBOW = np.array(goemotions_data['word2vec_embedding_CBOW'].tolist())  # Word embeddings as features

In [20]:
# Performing 5-fold cross validation
scoring_metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
scores_anger_cbow = cross_validate(clf, X_CBOW, y_anger, cv=kf, scoring=scoring_metrics)
scores_joy_cbow = cross_validate(clf, X_CBOW, y_joy, cv=kf, scoring=scoring_metrics)
scores_sadness_cbow = cross_validate(clf, X_CBOW, y_sadness, cv=kf, scoring=scoring_metrics)
scores_disgust_cbow = cross_validate(clf, X_CBOW, y_disgust, cv=kf, scoring=scoring_metrics)
scores_surprise_cbow = cross_validate(clf, X_CBOW, y_surprise, cv=kf, scoring=scoring_metrics)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [21]:
# Calculating and printing the avg score +/- std dev
for metric in scoring_metrics:
    avg_score_anger_cbow = np.mean(scores_anger_cbow[f'test_{metric}'])
    std_dev_anger_cbow = np.std(scores_anger_cbow[f'test_{metric}'])

    avg_score_joy_cbow = np.mean(scores_joy_cbow[f'test_{metric}'])
    std_dev_joy_cbow = np.std(scores_joy_cbow[f'test_{metric}'])

    avg_score_sadness_cbow = np.mean(scores_sadness_cbow[f'test_{metric}'])
    std_dev_sadness_cbow = np.std(scores_sadness_cbow[f'test_{metric}'])

    avg_score_disgust_cbow = np.mean(scores_disgust_cbow[f'test_{metric}'])
    std_dev_disgust_cbow = np.std(scores_disgust_cbow[f'test_{metric}'])

    avg_score_surprise_cbow = np.mean(scores_surprise_cbow[f'test_{metric}'])
    std_dev_surprise_cbow = np.std(scores_surprise_cbow[f'test_{metric}'])


    print(f"{metric} anger: {avg_score_anger_cbow:.4f} +/- {std_dev_anger_cbow:.4f}")
    print(f"{metric} joy: {avg_score_joy_cbow:.4f} +/- {std_dev_joy_cbow:.4f}")
    print(f"{metric} sadness: {avg_score_sadness_cbow:.4f} +/- {std_dev_sadness_cbow:.4f}")
    print(f"{metric} disgust: {avg_score_disgust_cbow:.4f} +/- {std_dev_disgust_cbow:.4f}")
    print(f"{metric} surprise: {avg_score_surprise_cbow:.4f} +/- {std_dev_surprise_cbow:.4f}")
    print("\n")

accuracy anger: 0.9635 +/- 0.0001
accuracy joy: 0.9670 +/- 0.0004
accuracy sadness: 0.9699 +/- 0.0001
accuracy disgust: 0.9812 +/- 0.0002
accuracy surprise: 0.9750 +/- 0.0002


precision_macro anger: 0.6880 +/- 0.0184
precision_macro joy: 0.7045 +/- 0.0844
precision_macro sadness: 0.7031 +/- 0.0467
precision_macro disgust: 0.5907 +/- 0.0972
precision_macro surprise: 0.5203 +/- 0.0416


recall_macro anger: 0.5109 +/- 0.0022
recall_macro joy: 0.5091 +/- 0.0043
recall_macro sadness: 0.5065 +/- 0.0024
recall_macro disgust: 0.5024 +/- 0.0021
recall_macro surprise: 0.5005 +/- 0.0009


f1_macro anger: 0.5124 +/- 0.0042
f1_macro joy: 0.5098 +/- 0.0082
f1_macro sadness: 0.5055 +/- 0.0047
f1_macro disgust: 0.5001 +/- 0.0043
f1_macro surprise: 0.4951 +/- 0.0018




## BERT model

In [22]:
# Import of fine tuned bert model
model_path = "fine_tuned_bert"
bert_tokenizer = BertTokenizer.from_pretrained(model_path)
bert_model = BertForMaskedLM.from_pretrained(model_path)

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


In [23]:
# Function that returns bert embedding for an input sentence
def get_bert_embedding_sentence(sentence, model, tokenizer):
    # Tokenize the input sentence
    inputs = tokenizer(
        sentence,
        return_tensors='pt',  # PyTorch tensors
        truncation=True,      # Truncate if the sentence is too long
        padding=True          # Add padding to match the model's input size
    )

    # Forward pass through the model to get hidden states
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)

    # Extract the last hidden state (we assume it’s the first hidden state)
    hidden_states = outputs.hidden_states[-1]  # Get the last hidden state from all layers

    # Compute the mean pooling of token embeddings
    sentence_embedding = hidden_states.mean(dim=1).squeeze().numpy()

    return sentence_embedding

In [24]:
# Applying get_bert_embeddings_sentence to all sentences in goemotions_data
bert_model.eval()
goemotions_data["bert_embedding"] = goemotions_data["text"].apply(
     lambda sentence: get_bert_embedding_sentence(sentence, bert_model, bert_tokenizer)
)

## ML elgorithms for bert embeddings 

In [25]:
# Splitting data
X_BERT = np.array(goemotions_data['bert_embedding'].tolist())  # Word embeddings as features

y_anger = goemotions_data['anger']  
y_joy = goemotions_data['joy']
y_sadness = goemotions_data['sadness']
y_disgust = goemotions_data['disgust']
y_surprise = goemotions_data['surprise']

In [26]:
# Performing 5-fold cross validation
scoring_metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']

scores_anger_bert = cross_validate(clf, X_BERT, y_anger, cv=kf, scoring=scoring_metrics)
scores_joy_bert = cross_validate(clf, X_BERT, y_joy, cv=kf, scoring=scoring_metrics)
scores_sadness_bert = cross_validate(clf, X_BERT, y_sadness, cv=kf, scoring=scoring_metrics)
scores_disgust_bert = cross_validate(clf, X_BERT, y_disgust, cv=kf, scoring=scoring_metrics)
scores_surprise_bert = cross_validate(clf, X_BERT, y_surprise, cv=kf, scoring=scoring_metrics)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [27]:
# Calculating and printing the avg score +/- std dev
for metric in scoring_metrics:
    avg_score_anger_bert = np.mean(scores_anger_bert[f'test_{metric}'])
    std_dev_anger_bert = np.std(scores_anger_bert[f'test_{metric}'])

    avg_score_joy_bert = np.mean(scores_joy_bert[f'test_{metric}'])
    std_dev_joy_bert = np.std(scores_joy_bert[f'test_{metric}'])

    avg_score_sadness_bert = np.mean(scores_sadness_bert[f'test_{metric}'])
    std_dev_sadness_bert = np.std(scores_sadness_bert[f'test_{metric}'])

    avg_score_disgust_bert = np.mean(scores_disgust_bert[f'test_{metric}'])
    std_dev_disgust_bert = np.std(scores_disgust_bert[f'test_{metric}'])

    avg_score_surprise_bert = np.mean(scores_surprise_bert[f'test_{metric}'])
    std_dev_surprise_bert = np.std(scores_surprise_bert[f'test_{metric}'])


    print(f"{metric} anger: {avg_score_anger_bert:.4f} +/- {std_dev_anger_bert:.4f}")
    print(f"{metric} joy: {avg_score_joy_bert:.4f} +/- {std_dev_joy_bert:.4f}")
    print(f"{metric} sadness: {avg_score_sadness_bert:.4f} +/- {std_dev_sadness_bert:.4f}")
    print(f"{metric} disgust: {avg_score_disgust_bert:.4f} +/- {std_dev_disgust_bert:.4f}")
    print(f"{metric} surprise: {avg_score_surprise_bert:.4f} +/- {std_dev_surprise_bert:.4f}")
    print("\n")

accuracy anger: 0.9642 +/- 0.0003
accuracy joy: 0.9671 +/- 0.0002
accuracy sadness: 0.9702 +/- 0.0001
accuracy disgust: 0.9814 +/- 0.0001
accuracy surprise: 0.9755 +/- 0.0000


precision_macro anger: 0.8040 +/- 0.1019
precision_macro joy: 0.7328 +/- 0.1434
precision_macro sadness: 0.8601 +/- 0.1118
precision_macro disgust: 0.7207 +/- 0.2040
precision_macro surprise: 0.6878 +/- 0.1871


recall_macro anger: 0.5078 +/- 0.0039
recall_macro joy: 0.5035 +/- 0.0019
recall_macro sadness: 0.5030 +/- 0.0010
recall_macro disgust: 0.5039 +/- 0.0037
recall_macro surprise: 0.5015 +/- 0.0014


f1_macro anger: 0.5063 +/- 0.0076
f1_macro joy: 0.4988 +/- 0.0038
f1_macro sadness: 0.4985 +/- 0.0019
f1_macro disgust: 0.5031 +/- 0.0073
f1_macro surprise: 0.4968 +/- 0.0028


