<a href="https://colab.research.google.com/github/CodingWeeb-Gaurav/Text-Emotion-Analysis-SVM/blob/main/Emotion_Detection_from_Text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Basic Libraries
import pandas as pd
import numpy as np

# Visualization libraries
import seaborn as sns
import matplotlib.pyplot as plt

# Text Libraries
import nltk
import string
import re

# Feature Extraction Libraries
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split

# Classifier Model libraries
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn import tree
# from sklearn.pipeline import Pipeline

# Performance Matrix libraries
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay

# other
import warnings
warnings.filterwarnings("ignore")

# Drive Mount
# from google.colab import drive
# drive.mount('/content/drive')

# 1. Dataset

In [5]:
# Load datasets
df1 = pd.read_csv('/content/text_emotions.csv')
df2 = pd.read_csv('/content/tweet_emotions.csv')
# df2 = df2.rename(columns={'tweet_id': 'id'})
# Create unified dataframe
df = pd.concat([
    df1[['content', 'sentiment']],
    df2[['content', 'sentiment']]
], ignore_index=True)

# emotion mapping and merge or drop rare emotions
emotion_map = {
    'neutral': ['neutral', 'empty', 'boredom'],
    # 'worry': ['worry', 'fear'], #dropped
    'happy': ['happiness', 'joy'],
    'sad': ['sadness'],# From both datasets
    'love': ['love'],
    #'surprise': ['surprise'],# Ignore karo
    'anger': ['anger', 'hate']
}

# Reverse mapping for implementation
reverse_map = {}
for new_cat, source_cats in emotion_map.items():
    for source_cat in source_cats:
        reverse_map[source_cat] = new_cat
df['sentiment'] = df['sentiment'].map(reverse_map)

# Drop rows of unmapped emotions
df = df.dropna(subset=['sentiment'])

# Verify results
print("Final Emotion Distribution:")
print(df['sentiment'].value_counts())

df

Final Emotion Distribution:
sentiment
happy      11970
sad        10962
neutral     9644
love        5483
anger       4142
Name: count, dtype: int64


Unnamed: 0,content,sentiment
0,i didnt feel humiliated,sad
1,i can go from feeling so hopeless to so damned...,sad
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
59995,@JohnLloydTaylor,neutral
59996,Happy Mothers Day All my love,love
59997,Happy Mother's Day to all the mommies out ther...,love
59998,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...,happy


## 2.1 Remove Punctuations

In [6]:
emojis = pd.read_csv('/content/emojis.txt',sep=',',header=None)
emojis_dict = {i:j for i,j in zip(emojis[0],emojis[1])}
pattern = '|'.join(sorted(re.escape(k) for k in emojis_dict))

def replace_emojis(text):
    text = re.sub(pattern,lambda m: emojis_dict.get(m.group(0)), text, flags=re.IGNORECASE)
    return text

def remove_punct(text):
    text = replace_emojis(text)
    text  = "".join([char for char in text if char not in string.punctuation])
    text = re.sub('[0-9]+', '', text)
    return text

df['Tweet_punct'] = df['content'].apply(lambda x: remove_punct(x))
df.head(10)

Unnamed: 0,content,sentiment,Tweet_punct
0,i didnt feel humiliated,sad,i didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,sad,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy,anger,i am feeling grouchy
5,ive been feeling a little burdened lately wasn...,sad,ive been feeling a little burdened lately wasn...
8,i have been with petronas for years i feel tha...,happy,i have been with petronas for years i feel tha...
9,i feel romantic too,love,i feel romantic too
10,i feel like i have to make the suffering i m s...,sad,i feel like i have to make the suffering i m s...
11,i do feel that running is a divine experience ...,happy,i do feel that running is a divine experience ...


## 2.2 Tokenization

In [7]:
def tokenization(text):
    text = text.lower()
    text = re.split('\W+', text)

    return text

df['Tweet_tokenized'] = df['Tweet_punct'].apply(lambda x: tokenization(x.lower()))
df.head(10)

Unnamed: 0,content,sentiment,Tweet_punct,Tweet_tokenized
0,i didnt feel humiliated,sad,i didnt feel humiliated,"[i, didnt, feel, humiliated]"
1,i can go from feeling so hopeless to so damned...,sad,i can go from feeling so hopeless to so damned...,"[i, can, go, from, feeling, so, hopeless, to, ..."
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong,"[im, grabbing, a, minute, to, post, i, feel, g..."
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...,"[i, am, ever, feeling, nostalgic, about, the, ..."
4,i am feeling grouchy,anger,i am feeling grouchy,"[i, am, feeling, grouchy]"
5,ive been feeling a little burdened lately wasn...,sad,ive been feeling a little burdened lately wasn...,"[ive, been, feeling, a, little, burdened, late..."
8,i have been with petronas for years i feel tha...,happy,i have been with petronas for years i feel tha...,"[i, have, been, with, petronas, for, years, i,..."
9,i feel romantic too,love,i feel romantic too,"[i, feel, romantic, too]"
10,i feel like i have to make the suffering i m s...,sad,i feel like i have to make the suffering i m s...,"[i, feel, like, i, have, to, make, the, suffer..."
11,i do feel that running is a divine experience ...,happy,i do feel that running is a divine experience ...,"[i, do, feel, that, running, is, a, divine, ex..."


In [15]:
def clean_text(text):
    text = remove_punct(text)
    text = tokenization(text)

    return text

# 80% Train, 20% test
X_train, X_test, y_train, y_test = train_test_split(df['content'], df['sentiment'],test_size=0.2, random_state = 0)

In [17]:
countVectorizer1 = CountVectorizer(analyzer=clean_text)
countVector1 = countVectorizer1.fit_transform(X_train)

countVector2 = countVectorizer1.transform(X_test)

tfidf_transformer_xtrain = TfidfTransformer()
x_train = tfidf_transformer_xtrain.fit_transform(countVector1)

tfidf_transformer_xtest = TfidfTransformer()
x_test = tfidf_transformer_xtrain.fit_transform(countVector2)

In [18]:
svm = SGDClassifier()
svm.fit(x_train, y_train)

y_pred = svm.predict(x_test)

svm_acc = accuracy_score(y_pred, y_test)
svm_prec = precision_score(y_test, y_pred, average='macro')
svm_recal = recall_score(y_test, y_pred, average='macro')
svm_cm = confusion_matrix(y_test,y_pred)
svm_f1 = f1_score(y_test, y_pred, average='macro')

print('Accuracy:', '{0:.3f}'.format(svm_acc*100))
print('Precision:', '{0:.3f}'.format(svm_prec*100))
print('Recall:', '{0:.3f}'.format(svm_recal*100))
print('F1-score:', '{0:.3f}'.format(svm_f1*100))
print(classification_report(y_test,y_pred))

Accuracy: 67.172
Precision: 70.149
Recall: 64.631
F1-score: 66.504
              precision    recall  f1-score   support

       anger       0.87      0.61      0.72       838
       happy       0.70      0.72      0.71      2433
        love       0.66      0.49      0.56      1122
     neutral       0.54      0.69      0.61      1878
         sad       0.74      0.71      0.73      2170

    accuracy                           0.67      8441
   macro avg       0.70      0.65      0.67      8441
weighted avg       0.69      0.67      0.67      8441



In [25]:

def predict_emotion(text_input):
    """
    Predict emotion from user-provided text using the trained SVM model
    """
    # 1. Apply the same preprocessing pipeline
    cleaned_text = remove_punct(text_input)
    tokenized = tokenization(cleaned_text)
    cleaned_processed = ' '.join(tokenized)  # Convert tokens back to string for vectorizer

    # 2. Transform using the same vectorizers
    count_vector = countVectorizer1.transform([cleaned_processed])
    tfidf_vector = tfidf_transformer_xtrain.transform(count_vector)

    # 3. Predict
    prediction = svm.predict(tfidf_vector)[0]
    probabilities = svm.decision_function(tfidf_vector)[0]

    # 4. Get confidence scores (convert decision scores to pseudo-probabilities)
    confidences = 1 / (1 + np.exp(-probabilities))  # Sigmoid transformation
    emotion_confidence = dict(zip(svm.classes_, confidences))

    # 5. Format results
    result = {
        'predicted_emotion': prediction,
        'confidence_scores': {emotion: f"{score:.2%}"
                            for emotion, score in sorted(emotion_confidence.items(),
                                                       key=lambda x: x[1],
                                                       reverse=True)}
    }

    return result

# Interactive testing loop
print("Emotion Detection Demo")
print("Type 'quit' to exit\n")

while True:
    user_input = input("Enter text to analyze: ")
    if user_input.lower() == 'quit':
        break

    prediction = predict_emotion(user_input)

    print(f"\nPredicted Emotion: {prediction['predicted_emotion'].upper()}")
    print("Confidence Scores:")
    for emotion, score in prediction['confidence_scores'].items():
        print(f"{emotion}: {score}", end = '. ')
    print('\n')
    print("-" * 50 + "")

Emotion Detection Demo
Type 'quit' to exit

Enter text to analyze: This is a hopeless situation for everyone

Predicted Emotion: SAD
Confidence Scores:
sad: 62.78%. neutral: 32.34%. love: 24.72%. anger: 24.17%. happy: 12.99%. 

--------------------------------------------------
Enter text to analyze: My parents are affectionate towards me

Predicted Emotion: LOVE
Confidence Scores:
love: 53.02%. neutral: 31.81%. anger: 26.58%. sad: 17.95%. happy: 12.15%. 

--------------------------------------------------
Enter text to analyze: I saw four cars in the garage

Predicted Emotion: NEUTRAL
Confidence Scores:
neutral: 40.71%. anger: 27.82%. happy: 26.50%. sad: 25.63%. love: 25.41%. 

--------------------------------------------------
Enter text to analyze: Everyone in the batch passed with flying colors

Predicted Emotion: HAPPY
Confidence Scores:
happy: 36.82%. neutral: 36.65%. sad: 27.76%. anger: 24.24%. love: 23.82%. 

--------------------------------------------------
Enter text to anal

In [24]:
My parents are affectionate towards me
This is a hopeless situation for everyone

SyntaxError: invalid syntax (<ipython-input-24-4e7e705b2e6f>, line 1)