<a href="https://colab.research.google.com/github/Muhammad0isah/Emotion-Similarity/blob/main/emotions_classes_similarities.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [24]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load your dataset
data = pd.read_csv('/content/drive/MyDrive/LCF-ATEPC-Restaurant/LCF-ATEPC-Restaurant/Annotated ABSA with Emotions Dataset.csv')


# Load pre-trained DistilBERT Emotion model and tokenizer
model_name = "bhadresh-savani/distilbert-base-uncased-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Ensure there are no duplicate emotion classes and group sentences by emotion
grouped_sentences = data.groupby("Emotion Class")["Review Sentence"].apply(list).to_dict()



In [29]:
print(grouped_sentences.keys())

dict_keys(['Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise'])
dict_values([['But the staff was so horrible to us.', "i've noticed megadeth, mr. scruff, st. germain, traditional vietnamese songs, black sabbath, jay-z, and daft punk all being played.", 'All the money went into the interior decoration, none of it went to the chefs.', 'All the money went into the interior decoration, none of it went to the chefs.', 'The seats are uncomfortable if you are sitting against the wall on wooden benches.', 'Two words: Free wine.', 'The price is reasonable although the service is poor.', 'The price is reasonable although the service is poor.', "At the end you're left with a mild broth with noodles that you can slurp out of a cup.", 'Dip the ingredients in with your chopsticks, swirl them around, and eat.', 'Dip the ingredients in with your chopsticks, swirl them around, and eat.', 'The atmosphere is unheralded, the service impecible, and the food magnificant.', 'The atmosphere is unheralde

In [26]:
# Function to get logits  as sentence embeddings
def get_emotion_logits(sentence):
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    # Extract logits for the emotion classes
    return outputs.logits

# Compute the mean embedding (logits) for each emotion class
emotion_logits = {}
for emotion, sentences in grouped_sentences.items():
    # Get embeddings for each sentence in the group
    embeddings = [get_emotion_logits(sentence) for sentence in sentences]
    # Compute mean embedding for the group
    mean_embedding = torch.mean(torch.cat(embeddings, dim=0), dim=0, keepdim=True)
    emotion_logits[emotion] = mean_embedding.numpy()

# Compute pairwise cosine similarities
similarities = {}
emotions = list(emotion_logits.keys())
for i in range(len(emotions)):
    for j in range(i + 1, len(emotions)):
        emotion1, emotion2 = emotions[i], emotions[j]
        similarity = cosine_similarity(emotion_logits[emotion1], emotion_logits[emotion2])[0][0]
        similarities[f"{emotion1} vs {emotion2}"] = similarity

# Print the similarity results
for pair, sim in similarities.items():
    print(f"Similarity between {pair}: {sim:.2f}")


Similarity between Anger vs Disgust: 0.9852
Similarity between Anger vs Fear: 0.9888
Similarity between Anger vs Joy: 0.5542
Similarity between Anger vs Sadness: 0.9688
Similarity between Anger vs Surprise: 0.8330
Similarity between Disgust vs Fear: 0.9727
Similarity between Disgust vs Joy: 0.6805
Similarity between Disgust vs Sadness: 0.9359
Similarity between Disgust vs Surprise: 0.9088
Similarity between Fear vs Joy: 0.5079
Similarity between Fear vs Sadness: 0.9782
Similarity between Fear vs Surprise: 0.7984
Similarity between Joy vs Sadness: 0.3945
Similarity between Joy vs Surprise: 0.9140
Similarity between Sadness vs Surprise: 0.7090
