In [1]:
import pandas as pd
import numpy as np
import nltk
import transformers
from transformers import AutoTokenizer
import emoji
from sklearn.metrics import accuracy_score, precision_score, recall_score
from textblob import TextBlob



In [2]:
pip install emoji

Collecting emoji
  Downloading emoji-2.8.0-py2.py3-none-any.whl (358 kB)
Installing collected packages: emoji
Successfully installed emoji-2.8.0
Note: you may need to restart the kernel to use updated packages.


In [7]:
pip install textblob

Collecting textblob
  Downloading textblob-0.17.1-py2.py3-none-any.whl (636 kB)
Installing collected packages: textblob
Successfully installed textblob-0.17.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

In [3]:
emoji_list = list(emoji.EMOJI_DATA.keys())
cnt = 0
for e in emoji_list:
    tokenized = tokenizer.decode(tokenizer.encode(e)).strip("</s>").strip()
    if e not in tokenized:
        cnt += 1
print(f"{cnt/len(emoji_list)} of the emojis are not identified by this tokenizer.")

0.9980984576378619 of the emojis are not identified by this tokenizer.


In [4]:
def emoji2description(text):
  return emoji.replace_emoji(text, replace=lambda chars, data_dict: ' '.join(data_dict['en'].split('_')).strip(':'))

In [5]:
emoji_sentiment_dict = {
    '😀': 'positive',
    '😃': 'positive',
    '😄': 'positive',
    '😁': 'positive',
    '😆': 'positive',
    '😅': 'positive',
    '😂': 'positive',
    '🤣': 'positive',
    '😊': 'positive',
    '😇': 'positive',
    '🙂': 'positive',
    '🙃': 'positive',
    '😉': 'positive',
    '😌': 'positive',
    '😍': 'positive',
    '😘': 'positive',
    '😗': 'positive',
    '😙': 'positive',
    '😚': 'positive',
    '😋': 'positive',
    '😛': 'positive',
    '😜': 'positive',
    '😝': 'positive',
    '😎': 'positive',
    '😏': 'positive',
    '😐': 'neutral',
    '😑': 'neutral',
    '😒': 'negative',
    '😓': 'negative',
    '😔': 'negative',
    '😕': 'negative',
    '😖': 'negative',
    '😞': 'negative',
    '😟': 'negative',
    '😠': 'negative',
    '😡': 'negative',
    '🤬': 'negative',
    '😢': 'negative',
    '😣': 'negative',
    '😤': 'negative',
    '😥': 'negative',
    '😦': 'negative',
    '😧': 'negative',
    '😨': 'negative',
    '😩': 'negative',
    '😪': 'negative',
    '😫': 'negative',
    '😬': 'negative',
    '😭': 'negative',
    '😮': 'neutral',
    '😯': 'neutral',
    '😰': 'negative',
    '😱': 'negative',
    '😲': 'negative',
    '😳': 'neutral',
    '😴': 'neutral',
    '😵': 'negative',
    '😶': 'neutral',
    '😷': 'negative',
    '🤒': 'negative',
    '🤕': 'negative',
    '🤢': 'negative',
    '🤮': 'negative',
    '🤧': 'negative',
    '🥵': 'negative',
    '🥶': 'negative',
    '🥴': 'negative',
    '😵‍💫': 'negative',  # Example of a complex emoji

    # Add more emojis and sentiments as needed
}

In [20]:
def analyzesent(text):
    text_2 = emoji2description(text)
    #print(text_2)
    p_1 = TextBlob(text_2).sentiment.polarity
    s_1 = TextBlob(text_2).sentiment.subjectivity
    if p_1 > 0:
        si = "Positive"
    elif p_1 < 0:
        si = "Negative"
    else:
        si = "Neutral"
    
    return {
        "Sentiment": si,
        "Subjectivity": s_1
    }
text_1="I love the movie so much 😭😭😭"
sentiment_result=analyzesent(text_1)
print(sentiment_result)
#Polarity determines the sentiment of the text. Its values lie in [-1,1] where -1 denotes a highly negative sentiment and 
#1 denotes a highly positive sentiment. Subjectivity determines whether a text input is factual information or a personal 
#opinion. Its value lies between [0,1] where a value closer to 0 denotes a piece of factual information and a value closer 
#to 1 denotes a personal opinion.
    

{'Sentiment': 'Negative', 'Subjectivity': 0.6}


In [16]:
labeled_dataset = [
    {"text": "I love this product! It's amazing. 😍", "true_sentiment": "Positive"},
    {"text": "The service was terrible. I'm very disappointed. 😡", "true_sentiment": "Negative"},
    {"text": "This is an okay movie, not great but not terrible. 😐", "true_sentiment": "Neutral"},
    {"text": "The food was excellent, and the staff was friendly. 😃", "true_sentiment": "Positive"},
    {"text": "I'm indifferent towards this book. 😑", "true_sentiment": "Neutral"},
    {"text": "The weather today is awful. ☔", "true_sentiment": "Negative"},
    {"text": "I had a fantastic day! 🎉", "true_sentiment": "Positive"},
    # Add more samples here...
]
predicted_sentiments = []  # To store the predicted sentiments
true_sentiments = []  # To store the true sentiments
for item in labeled_dataset:
    text = item["text"]
    true_sentiment = item["true_sentiment"]
    sentiment_result = analyzesent(text)
    predicted_sentiments.append(sentiment_result["Sentiment"])
    true_sentiments.append(true_sentiment)
    
    

#Calculate metrics
accuracy = accuracy_score(true_sentiments, predicted_sentiments)
precision = precision_score(true_sentiments, predicted_sentiments, average='weighted')
recall = recall_score(true_sentiments, predicted_sentiments, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")


Accuracy: 0.8571428571428571
Precision: 0.8928571428571429
Recall: 0.8571428571428571
