<a href="https://colab.research.google.com/github/SarthakAgase/AI-Speech-Emotion-Detection/blob/main/ML_Text_To_Emotion_with_all_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Emotion Detection Models




## Data Preprocessing

### Importing the libraries

In [1]:
import pandas as pd
import numpy as np
import re, nltk

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report


nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
stop_words.remove("no")
stop_words.remove("not")
stop_words.remove("nor")
stop_words = [x.lower() for x in stop_words]

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [2]:
!pip install lightgbm
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.2


### Pre-Defined Dictionary Words

In [3]:
contractions = {
        "ai\snot": "am not",
        "wo\snot": "will not",
        "would\snot": "would not",
        "should\snot": "should not",
        "isn\snot": "is not",
        "aren\snot": "are not",
        "wasn\snot": "was not",
        "weren\snot": "were not",
        "haven\snot": "have not",
        "hasn\snot": "has not",
        "hadn\snot": "had not",
        "don\snot": "do not",
        "doesn\snot": "does not",
        "didn\snot": "did not",
        "can\snot": "can not",
        "cannot": "can not",
        "couldn\snot": "could not",
        "shouldn\snot": "should not",
        "mightn\snot": "might not",
        "mustn\snot": "must not",
        "shan\snot": "shall not",
        "won\snot": "will not",
        "ain\snot": "am not",
        "I\sam": "I am",
        "you\sare": "you are",
        "he\sis": "he is",
        "she\sis": "she is",
        "it\sis": "it is",
        "we\sare": "we are",
        "they\sare": "they are",
        "I\swould": "I would",
        "you\swould": "you would",
        "he\swould": "he would",
        "she\swould": "she would",
        "it\swould": "it would",
        "we\swould": "we would",
        "they\swould": "they would",
        "I\scould": "I could",
        "you\scould": "you could",
        "he\scould": "he could",
        "she\scould": "she could",
        "it\scould": "it could",
        "we\scould": "we could",
        "they\scould": "they could",
        "I\shave": "I have",
        "you\shave": "you have",
        "he\shas": "he has",
        "she\shas": "she has",
        "it\shas": "it has",
        "we\shave": "we have",
        "they\shave": "they have",
        "I\swill": "I will",
        "you\swill": "you will",
        "he\swill": "he will",
        "she\swill": "she will",
        "it\swill": "it will",
        "we\swill": "we will",
        "they\swill": "they will",
        "I\smust": "I must",
        "you\smust": "you must",
        "he\smust": "he must",
        "she\smust": "she must",
        "it\smust": "it must",
        "we\smust": "we must",
        "they\smust": "they must",
        "I\sshall": "I shall",
        "you\sshall": "you shall",
        "he\sshall": "he shall",
        "she\sshall": "she shall",
        "it\sshall": "it shall",
        "we\sshall": "we shall",
        "they\sshall": "they shall",
        "haven\s't": "have not",
        "hasn\s't": "has not",
        "hadn\s't": "had not",
        "don\s't": "do not",
        "doesn\s't": "does not",
        "didn\s't": "did not",
        "can\s't": "can not",
        "cannot": "can not",
        "couldn\s't": "could not",
        "shouldn\s't": "should not",
        "mightn\s't": "might not",
        "mustn\s't": "must not",
        "shan\s't": "shall not",
        "won\s't": "will not",
        "ain\s't": "am not",
        "aren\s't": "are not",
        "wasn\s't": "was not",
        "weren\s't": "were not",
        "I\sdidn't": "I did not",
        "you\sdidn't": "you did not",
        "he\sdidn't": "he did not",
        "she\sdidn't": "she did not",
        "it\sdidn't": "it did not",
        "we\sdidn't": "we did not",
        "they\sdidn't": "they did not",
        "I\scannot": "I can not",
        "you\scannot": "you can not",
        "he\scannot": "he can not",
        "she\scannot": "she can not",
        "it\scannot": "it can not",
        "we\scannot": "we can not",
        "they\scannot": "they can not",
        "I\swon't": "I will not",
        "you\swon't": "you will not",
        "he\swon't": "he will not",
        "she\swon't": "she will not",
        "it\swon't": "it will not",
        "we\swon't": "we will not",
        "they\swon't": "they will not",
        "I\shasn't": "I has not",
        "you\shasn't": "you has not",
        "he\shasn't": "he has not",
        "she\shasn't": "she has not",
        "it\shasn't": "it has not",
        "we\shasn't": "we has not",
        "they\shasn't": "they has not"
}

nots = {
    'not sad': 'Happy', 'not bad': 'Happy', 'not boring': 'Happy', 'not wrong': 'Happy', 'not bored': 'Happy',
        'not jealous': 'Happy', 'not happy': 'Sad', 'not well': 'Sad', 'not suitable': 'Angry',
        'not right': 'Angry', 'not good': 'Sad', 'not excited': 'Angry', 'not funny ': 'Sad', 'not kind': 'Sad',
        'not proud': 'Angry', 'not cool': 'Angry', 'not funny': 'Angry', 'not kind': 'Angry', 'not open': 'Angry',
        'not safe': 'Fear', 'not enough': 'Empty', 'not know': 'Sad', 'not knowing': 'Sad', 'not believe': 'Angry',
        'not believing': 'Angry', 'not understand': 'Sad', 'not understanding': 'Sad', 'no doubt': 'Happy',
        'not think': 'Sad', 'not thinking': 'Sad', 'not recognise': 'Sad', 'not recognising': 'Sad',
        'not forget': 'Angry', 'not forgetting': 'Angry', 'not remember': 'Sad', 'not remembering': 'Sad',
        'not imagine': 'Sad', 'not imagining': 'Sad', 'not mean': 'Sad', 'not meaning': 'Sad',
        'not agree': 'Angry', 'not agreeing': 'Sad', 'not disagree': 'Happy', 'not disagreeing': 'Happy',
        'not deny': 'Sad', 'not denying': 'Sad', 'not promise': 'Angry', 'not promising': 'Angry',
        'not satisfy': 'Sad', 'not satisfying': 'Sad', 'not realise': 'Sad', 'not realising': 'Sad',
        'not appear': 'Angry', 'not appearing': 'Angry', 'not please': 'Sad', 'not pleasing': 'Sad',
        'not impress': 'Sad', 'not impressing': 'Sad', 'not surprise': 'Sad', 'not surprising': 'Sad',
        'not concern': 'Sad', 'not concerning': 'Sad', 'not have': 'Sad', 'not having': 'Sad',
        'not own': 'Sad', 'not owning': 'Sad', 'not possess': 'Sad', 'not possessing': 'Sad',
        'not lack': 'Sad', 'not lacking': 'Sad', 'not consist': 'Sad', 'not consisting': 'Sad',
        'not involve': 'Sad', 'not involving': 'Sad', 'not include': 'Sad', 'not including': 'Sad',
        'not contain': 'Sad', 'not containing': 'Sad', 'not love': 'Sad', 'not like': 'Angry',
        'not hate': 'Happy', 'not hating': 'Happy', 'not adore': 'Sad', 'not adoring': 'Sad',
        'not prefer': 'Sad', 'not preferring': 'Sad', 'not care': 'Angry', 'not mind': 'Angry',
        'not minding': 'Sad', 'not want': 'Angry', 'not wanting': 'Sad', 'not need': 'Angry',
        'not needing': 'Angry', 'not desire': 'Sad', 'not desiring': 'Sad', 'not wish': 'Sad',
        'not wishing': 'Sad', 'not hope': 'Sad', 'not hoping': 'Sad', 'not appreciate': 'Sad',
        'not appreciating': 'Sad', 'not value': 'Sad', 'not valuing': 'Sad', 'not owe': 'Sad',
        'not owing': 'Sad', 'not seem': 'Sad', 'not seeming': 'Sad', 'not fit': 'Sad', 'not fitting': 'Sad',
        'not depend': 'Sad', 'not depending': 'Sad', 'not matter': 'Sad', 'not afford': 'Sad',
        'not affording': 'Sad', 'not aim': 'Sad', 'not aiming': 'Sad', 'not attempt': 'Angry',
        'not attempting': 'Angry', 'not ask': 'Angry', 'not asking': 'Angry', 'not arrange': 'Angry',
        'not arranging': 'Angry', 'not beg': 'Angry', 'not begging': 'Angry', 'not begin': 'Angry',
        'not beginning': 'Angry', 'not caring': 'Angry', 'not choose': 'Angry', 'not choosing': 'Angry',
        'not claim': 'Angry', 'not claiming': 'Angry', 'not consent': 'Angry', 'not consenting': 'Angry',
        'not continue': 'Angry', 'not continuing': 'Angry', 'not dare': 'Angry', 'not daring': 'Angry',
        'not decide': 'Sad', 'not deciding': 'Sad', 'not demand': 'Angry', 'not demanding': 'Angry',
        'not deserve': 'Angry', 'not deserving': 'Angry', 'not expect': 'Angry', 'not expecting': 'Angry',
        'not fail': 'Happy', 'not failing': 'Happy', 'not get': 'Sad', 'not getting': 'Sad',
        'not hesitate': 'Sad', 'not hesitating': 'Sad', 'not hurry': 'Happy', 'not hurrying': 'Happy',
        'not intend': 'Sad', 'not intending': 'Sad', 'not learn': 'Angry', 'not learning': 'Angry',
        'not liking': 'Angry', 'not loving': 'Sad', 'not manage': 'Angry', 'not managing': 'Angry',
        'not neglect': 'Sad', 'not neglecting': 'Sad', 'not offer': 'Angry', 'not offering': 'Angry',
        'not plan': 'Angry', 'not planing': 'Angry', 'not prepare': 'Angry', 'not preparing': 'Angry',
        'not pretend': 'Angry', 'not pretending': 'Angry', 'not proceed': 'Angry', 'not proceeding': 'Angry',
        'not propose': 'Angry', 'not proposing': 'Sad', 'not refuse': 'Sad', 'not refusing': 'Sad',
        'not start': 'Sad', 'not starting': 'Sad', 'not stop': 'Happy', 'not stopping': 'Happy',
        'not struggle': 'Angry', 'not struggling': 'Angry', 'not swear': 'Angry', 'not swearing': 'Angry',
        'not threaten': 'Happy', 'not threatening': 'Happy', 'not try': 'Angry', 'not trying': 'Angry',
        'not volunteer': 'Angry', 'not volunteering': 'Angry', 'not wait': 'Angry', 'not waiting': 'Angry',
        'not feel': 'Sad', 'not feeling': 'Sad', "not able": "Sad", "not do": "Sad",
        'not apologize': 'Sad', 'not apologizing': 'Sad', 'not forgive': 'Angry', 'not forgiving': 'Angry',
        'not trust': 'Angry', 'not trusting': 'Angry', 'not regret': 'Angry', 'not regretting': 'Angry',
        'not rejoice': 'Sad', 'not rejoicing': 'Sad', 'not admire': 'Sad', 'not admiring': 'Sad',
        'not compliment': 'Sad', 'not complimenting': 'Sad', 'not criticize': 'Happy', 'not criticizing': 'Happy',
        'not encourage': 'Angry', 'not encouraging': 'Angry', 'not insult': 'Sad', 'not insulting': 'Sad',
        'not praise': 'Angry', 'not praising': 'Angry', 'not support': 'Angry', 'not supporting': 'Angry',
        'not blame': 'Sad', 'not blaming': 'Sad', 'not defend': 'Sad', 'not defending': 'Sad',
        'not appreciate': 'Sad', 'not appreciating': 'Sad', 'not enjoy': 'Sad', 'not enjoying': 'Sad',
        'not like': 'Angry', 'not liking': 'Angry', 'not love': 'Sad', 'not loving': 'Sad',
        'not prefer': 'Sad', 'not preferring': 'Sad', 'not want': 'Angry', 'not wanting': 'Sad',
        'not believe': 'Angry', 'not believing': 'Angry', 'not doubt': 'Happy', 'not doubting': 'Happy',
        'not imagine': 'Sad', 'not imagining': 'Sad', 'not realize': 'Sad', 'not realizing': 'Sad',
        'not remember': 'Sad', 'not remembering': 'Sad', 'not recognize': 'Sad', 'not recognizing': 'Sad',
        'not consider': 'Sad', 'not considering': 'Sad', 'not think': 'Sad', 'not thinking': 'Sad',
        'not forget': 'Angry', 'not forgetting': 'Angry', 'not ignore': 'Angry', 'not ignoring': 'Angry',
        'not overlook': 'Angry', 'not overlooking': 'Angry', 'not understand': 'Sad', 'not understanding': 'Sad',
        'not hear': 'Angry', 'not hearing': 'Angry', 'not listen': 'Angry', 'not listening': 'Angry',
        'not look': 'Angry', 'not looking': 'Angry', 'not smell': 'Angry', 'not smelling': 'Angry',
        'not taste': 'Angry', 'not tasting': 'Angry', 'not touch': 'Angry', 'not touching': 'Angry',
        'not feel': 'Sad', 'not feeling': 'Sad', 'not sense': 'Sad', 'not sensing': 'Sad',
        'not suppose': 'Angry', 'not supposing': 'Angry', 'not expect': 'Angry', 'not expecting': 'Angry',
        'not wait': 'Angry', 'not waiting': 'Angry', 'not long': 'Angry', 'not longing': 'Angry',
        'not yearn': 'Angry', 'not yearning': 'Angry', 'not wish': 'Sad', 'not wishing': 'Sad',
        'not hope': 'Sad', 'not hoping': 'Sad', 'not desire': 'Sad', 'not desiring': 'Sad',
        'not miss': 'Angry', 'not missing': 'Angry', 'not need': 'Angry', 'not needing': 'Angry',
        'not want': 'Angry', 'not wanting': 'Sad', 'not require': 'Angry', 'not requiring': 'Angry',
        'not demand': 'Angry', 'not demanding': 'Angry', 'not insist': 'Angry', 'not insisting': 'Angry',
        'not force': 'Angry', 'not forcing': 'Angry', 'not push': 'Angry', 'not pushing': 'Angry',
        'not pull': 'Angry', 'not pulling': 'Angry', 'not drag': 'Angry', 'not dragging': 'Angry',
        'not carry': 'Angry', 'not carrying': 'Angry', 'not lift': 'Angry', 'not lifting': 'Angry',
        'not drop': 'Angry', 'not dropping': 'Angry', 'not throw': 'Angry', 'not throwing': 'Angry',
        'not catch': 'Angry', 'not catching': 'Angry', 'not capture': 'Angry', 'not capturing': 'Angry',
        'not grab': 'Angry', 'not grabbing': 'Angry', 'not touch': 'Angry', 'not touching': 'Angry',
        'not reach': 'Angry', 'not reaching': 'Angry', 'not approach': 'Angry', 'not approaching': 'Angry',
        'not avoid': 'Angry', 'not avoiding': 'Angry', 'not evade': 'Angry', 'not evading': 'Angry',
        'not elude': 'Angry', 'not eluding': 'Angry', 'not escape': 'Angry', 'not escaping': 'Angry',
        'not run': 'Angry', 'not running': 'Angry', 'not jog': 'Angry', 'not jogging': 'Angry',
        'not walk': 'Angry', 'not walking': 'Angry', 'not crawl': 'Angry', 'not crawling': 'Angry',
        'not sneak': 'Angry', 'not sneaking': 'Angry', 'not tiptoe': 'Angry', 'not tiptoeing': 'Angry',
        'not dance': 'Angry', 'not dancing': 'Angry', 'not stomp': 'Angry', 'not stomping': 'Angry',
        'not shake': 'Angry', 'not shaking': 'Angry', 'not tremble': 'Angry', 'not trembling': 'Angry',
        'not shiver': 'Angry', 'not shivering': 'Angry', 'not quiver': 'Angry', 'not quivering': 'Angry',
        'not vibrate': 'Angry', 'not vibrating': 'Angry', 'not pulsate': 'Angry', 'not pulsating': 'Angry',
        'not throb': 'Angry', 'not throbbing': 'Angry', 'not beat': 'Angry', 'not beating': 'Angry',
        'not palpitate': 'Angry', 'not palpitating': 'Angry', 'not pump': 'Angry', 'not pumping': 'Angry',
        'not glide': 'Angry', 'not gliding': 'Angry', 'not slide': 'Angry', 'not sliding': 'Angry',
        'not slip': 'Angry', 'not slipping': 'Angry', 'not skid': 'Angry'
}

shortcuts = {
    'u': 'you', 'y': 'why', 'r': 'are', 'doin': 'doing', 'hw': 'how', 'k': 'okay', 'm': 'am',
    'b4': 'before',
                   'idc': "i do not care", 'ty': 'thank you', 'wlcm': 'welcome', 'bc': 'because', '<3': 'love',
                   'xoxo': 'love',
                   'ttyl': 'talk to you later', 'gr8': 'great', 'bday': 'birthday', 'awsm': 'awesome', 'gud': 'good',
                   'h8': 'hate',
                   'lv': 'love', 'dm': 'direct message', 'rt': 'retweet', 'wtf': 'hate', 'idgaf': 'hate',
                   'irl': 'in real life', 'yolo': 'you only live once', "don't": "do not", 'g8': 'great',
                   "won't": "will not", 'tbh': 'to be honest', 'caj': 'casual', 'Ikr': 'I know, right?',
                   'omw': 'on my way',
                   'ofc': 'of course', 'Idc': "I don't care", 'Irl': 'In real life', 'tbf': 'To be fair',
                   'obvs': 'obviously', 'v': 'very', 'atm': 'at the moment',
                   'col': 'crying out loud', 'gbu': 'god bless you', 'gby': 'god bless you', 'gotcha': 'I got you',
                   'hehe': 'laughing', 'haha': 'laughing', 'hf': 'have fun',
                   'hry': 'hurry', 'hw': 'hardwork', 'idc': 'i don’t care', 'ikr': 'i know right', 'k': 'ok',
                   'lmao': 'laughing my ass off', 'lol': 'laughing out loud',
                   'n1': 'nice one', 'na': 'not available', 'qt': 'cutie', 'qtpi': 'cutie pie',
                   'rip': 'rest in peace',
                   'sry': 'sorry', 'tc': 'take care',
                   'thnks': 'thanks', 'thx': 'thanks', 'thnk': 'thanks', 'ttyl': 'talk to you later', 'txt': 'text',
                   'ugh': 'disgusted', 'w8': 'wait', "not sad": "happy"
}

### Importing the dataset

In [4]:
dataset = pd.read_csv("/content/final_dataset.csv", header = None)
X = dataset[0].values
y = dataset[1].values

In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
emotions = le.inverse_transform([0,1,2,3,4,5])
print(len(y))
print(emotions)

19999
['anger' 'fear' 'joy' 'love' 'sadness' 'surprise']


### Cleaning Texts

In [6]:
def remove_contradictions(text):
    if "n't" in text:
        text = text.replace("n't", " not")
    for pattern, replacement in contractions.items():
        text = re.sub(pattern, replacement, text)
    return text

In [7]:
def removing_not(text):
        f = re.findall("not\s\w+", text)
        for i in f:
            try:
                text = text.replace(i, nots[i])
            except:
                pass
        text = text.lower()
        return text

In [8]:
def removing_shortcuts(text):
    full_words = []

    for token in text:
        if token in shortcuts.keys():
            token = shortcuts[token]
        full_words.append(token)
    text = " ".join(full_words)
    return text

In [9]:
def removing_stopwords(text):
    return [word for word in text if not word in stop_words]

In [10]:
corpus = []
for i in X:
  review = re.sub("[^a-zA-Z]"," ",i)
  review = review.lower()
  review = re.sub(r'http\S+|www.\S+', '', review)
  review = remove_contradictions(review)
  review = removing_not(review)
  review = review.split()
  review = removing_shortcuts(review)
  review = ' '.join([i for i in review.split() if not i.isdigit()])
  review = word_tokenize(review)
  review = removing_stopwords(review)
  lemma = WordNetLemmatizer()
  review = [lemma.lemmatize(word) for word in review]
  review = " ".join(review)
  corpus.append(review)

### Creating the Bag of Words

In [11]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=2000)
X = cv.fit_transform(corpus).toarray()
print(len(X),len(X[0]))

19999 2000


### Splitting the dataset into the Training set and Test set

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

## Classification Models

### MLP Model

In [13]:
from sklearn.neural_network import MLPClassifier
MLP_classifier = MLPClassifier(max_iter = 1000)
MLP_classifier.fit(X_train, y_train)
y_pred = MLP_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 83.8000%
Classification Report:
              precision    recall  f1-score   support

       anger       0.83      0.83      0.83       538
        fear       0.77      0.77      0.77       505
         joy       0.86      0.88      0.87      1294
        love       0.72      0.69      0.71       342
     sadness       0.90      0.89      0.90      1174
    surprise       0.69      0.67      0.68       147

    accuracy                           0.84      4000
   macro avg       0.80      0.79      0.79      4000
weighted avg       0.84      0.84      0.84      4000



### GB Model

In [14]:
from sklearn.ensemble import GradientBoostingClassifier
GB_classifier = GradientBoostingClassifier()
GB_classifier.fit(X_train, y_train)
y_pred = GB_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 83.6250%
Classification Report:
              precision    recall  f1-score   support

       anger       0.94      0.75      0.83       538
        fear       0.90      0.72      0.80       505
         joy       0.74      0.96      0.83      1294
        love       0.86      0.73      0.79       342
     sadness       0.94      0.84      0.88      1174
    surprise       0.71      0.74      0.73       147

    accuracy                           0.84      4000
   macro avg       0.85      0.79      0.81      4000
weighted avg       0.85      0.84      0.84      4000



### LightGBM Model

In [15]:
from lightgbm import LGBMClassifier
LGBM_classifier = LGBMClassifier()
LGBM_classifier.fit(X_train, y_train)
y_pred = LGBM_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.199085 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3207
[LightGBM] [Info] Number of data points in the train set: 15999, number of used features: 1199
[LightGBM] [Info] Start training from score -1.997338
[LightGBM] [Info] Start training from score -2.147658
[LightGBM] [Info] Start training from score -1.073979
[LightGBM] [Info] Start training from score -2.510931
[LightGBM] [Info] Start training from score -1.241482
[LightGBM] [Info] Start training from score -3.331143
Accuracy = 87.1500%
Classification Report:
              precision    recall  f1-score   support

       anger       0.87      0.86      0.86       538
        fear       0.84      0.80      0.82       505
         joy       0.86      0.91      0.89      1294
        love       0.76      0.80      0.78       342
     sa

### CatBoost Model

In [16]:
from catboost import CatBoostClassifier
CB_classifier = CatBoostClassifier(logging_level='Silent')
CB_classifier.fit(X_train, y_train)
y_pred = CB_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 88.9500%
Classification Report:
              precision    recall  f1-score   support

       anger       0.91      0.88      0.89       538
        fear       0.85      0.82      0.84       505
         joy       0.87      0.95      0.91      1294
        love       0.89      0.72      0.80       342
     sadness       0.93      0.93      0.93      1174
    surprise       0.78      0.69      0.73       147

    accuracy                           0.89      4000
   macro avg       0.87      0.83      0.85      4000
weighted avg       0.89      0.89      0.89      4000



### XGBoost Model

In [17]:
from xgboost import XGBClassifier
XGBC_classifier = XGBClassifier()
XGBC_classifier.fit(X_train, y_train)
y_pred = XGBC_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 88.7250%
Classification Report:
              precision    recall  f1-score   support

       anger       0.90      0.88      0.89       538
        fear       0.86      0.84      0.85       505
         joy       0.88      0.91      0.90      1294
        love       0.79      0.82      0.81       342
     sadness       0.95      0.91      0.93      1174
    surprise       0.75      0.78      0.77       147

    accuracy                           0.89      4000
   macro avg       0.86      0.86      0.86      4000
weighted avg       0.89      0.89      0.89      4000



### Extra Trees Model

In [18]:
from sklearn.ensemble import ExtraTreesClassifier
ET_classifier = ExtraTreesClassifier()
ET_classifier.fit(X_train, y_train)
y_pred = ET_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 88.3500%
Classification Report:
              precision    recall  f1-score   support

       anger       0.89      0.89      0.89       538
        fear       0.83      0.83      0.83       505
         joy       0.92      0.89      0.91      1294
        love       0.80      0.81      0.80       342
     sadness       0.92      0.93      0.92      1174
    surprise       0.66      0.75      0.70       147

    accuracy                           0.88      4000
   macro avg       0.84      0.85      0.84      4000
weighted avg       0.89      0.88      0.88      4000



### Random Forest Classification


In [19]:
from sklearn.ensemble import RandomForestClassifier
random_forest_classifier = RandomForestClassifier()
random_forest_classifier.fit(X_train, y_train)
y_pred = random_forest_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 87.4250%
Classification Report:
              precision    recall  f1-score   support

       anger       0.88      0.89      0.88       538
        fear       0.82      0.82      0.82       505
         joy       0.92      0.87      0.89      1294
        love       0.78      0.80      0.79       342
     sadness       0.91      0.93      0.92      1174
    surprise       0.65      0.74      0.69       147

    accuracy                           0.87      4000
   macro avg       0.83      0.84      0.83      4000
weighted avg       0.88      0.87      0.87      4000



### Decision Tree Model

In [20]:
from sklearn.tree import DecisionTreeClassifier
decision_classifier = DecisionTreeClassifier()
decision_classifier.fit(X_train, y_train)
y_pred = decision_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 83.5000%
Classification Report:
              precision    recall  f1-score   support

       anger       0.78      0.86      0.82       538
        fear       0.78      0.81      0.80       505
         joy       0.89      0.79      0.84      1294
        love       0.74      0.78      0.76       342
     sadness       0.89      0.91      0.90      1174
    surprise       0.64      0.78      0.70       147

    accuracy                           0.83      4000
   macro avg       0.79      0.82      0.80      4000
weighted avg       0.84      0.83      0.84      4000



### Logistic Regression model

In [21]:
from sklearn.linear_model import LogisticRegression
logistic_classifier = LogisticRegression(max_iter = 1000)
logistic_classifier.fit(X_train, y_train)
y_pred = logistic_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 88.5250%
Classification Report:
              precision    recall  f1-score   support

       anger       0.89      0.87      0.88       538
        fear       0.85      0.81      0.83       505
         joy       0.90      0.92      0.91      1294
        love       0.81      0.78      0.80       342
     sadness       0.92      0.93      0.93      1174
    surprise       0.71      0.73      0.72       147

    accuracy                           0.89      4000
   macro avg       0.85      0.84      0.84      4000
weighted avg       0.88      0.89      0.88      4000



### Kernal SVM

In [22]:
from sklearn.svm import SVC
kernal_classifier = SVC(kernel = 'rbf')
kernal_classifier.fit(X_train, y_train)
y_pred = kernal_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 83.8250%
Classification Report:
              precision    recall  f1-score   support

       anger       0.89      0.76      0.82       538
        fear       0.83      0.73      0.78       505
         joy       0.79      0.94      0.86      1294
        love       0.88      0.58      0.70       342
     sadness       0.87      0.92      0.90      1174
    surprise       0.87      0.50      0.63       147

    accuracy                           0.84      4000
   macro avg       0.86      0.74      0.78      4000
weighted avg       0.84      0.84      0.83      4000



### Support Vector Machine

In [23]:
from sklearn.svm import SVC
svm_classifier = SVC(kernel = 'linear')
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 87.0000%
Classification Report:
              precision    recall  f1-score   support

       anger       0.86      0.86      0.86       538
        fear       0.84      0.83      0.83       505
         joy       0.88      0.90      0.89      1294
        love       0.77      0.79      0.78       342
     sadness       0.92      0.91      0.92      1174
    surprise       0.71      0.71      0.71       147

    accuracy                           0.87      4000
   macro avg       0.83      0.83      0.83      4000
weighted avg       0.87      0.87      0.87      4000



### K-Nearest Neighbors (K-NN)

In [24]:
from sklearn.neighbors import KNeighborsClassifier
K_nearest_classifier = KNeighborsClassifier()
K_nearest_classifier.fit(X_train, y_train)
y_pred = K_nearest_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 56.2500%
Classification Report:
              precision    recall  f1-score   support

       anger       0.36      0.61      0.45       538
        fear       0.52      0.48      0.49       505
         joy       0.62      0.65      0.63      1294
        love       0.61      0.24      0.34       342
     sadness       0.69      0.62      0.65      1174
    surprise       0.52      0.25      0.34       147

    accuracy                           0.56      4000
   macro avg       0.55      0.47      0.49      4000
weighted avg       0.59      0.56      0.56      4000



### Naive Bayes model

In [25]:
from sklearn.naive_bayes import GaussianNB
naive_classifier = GaussianNB()
naive_classifier.fit(X_train,y_train)
y_pred = naive_classifier.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 39.7000%
Classification Report:
              precision    recall  f1-score   support

       anger       0.38      0.46      0.42       538
        fear       0.35      0.58      0.44       505
         joy       0.84      0.33      0.47      1294
        love       0.24      0.71      0.35       342
     sadness       0.78      0.26      0.39      1174
    surprise       0.13      0.52      0.20       147

    accuracy                           0.40      4000
   macro avg       0.45      0.48      0.38      4000
weighted avg       0.62      0.40      0.42      4000



## Voting Classifier Model

In [None]:
from sklearn.ensemble import VotingClassifier

from sklearn.neural_network import MLPClassifier
MLP_classifier = MLPClassifier(max_iter = 1000)

from sklearn.ensemble import GradientBoostingClassifier
GB_classifier = GradientBoostingClassifier()

from lightgbm import LGBMClassifier
LGBM_classifier = LGBMClassifier()

from xgboost import XGBClassifier
XGBC_classifier = XGBClassifier()

from sklearn.ensemble import ExtraTreesClassifier
ET_classifier = ExtraTreesClassifier()

from sklearn.ensemble import RandomForestClassifier
random_forest_classifier = RandomForestClassifier()

from sklearn.tree import DecisionTreeClassifier
decision_classifier = DecisionTreeClassifier()

from sklearn.linear_model import LogisticRegression
logistic_classifier = LogisticRegression(max_iter = 1000)

from sklearn.svm import SVC
kernal_classifier = SVC(kernel = 'rbf')

from sklearn.svm import SVC
svm_classifier = SVC(kernel = 'linear')


ensemble = VotingClassifier(
    estimators=[
        ('MLPC', MLP_classifier),
        ('GBC', GB_classifier),
        ('LGBMC', LGBM_classifier),
        ('XGBC', XGBC_classifier),
        ('ETC', ET_classifier),
        ('RFC', random_forest_classifier),
        ('DTC', decision_classifier),
        ('LC', logistic_classifier),
        ('KSVMC', kernal_classifier),
        ('SVMC', svm_classifier)
        ],
    voting='hard')
ensemble.fit(X_train, y_train)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.199581 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3165
[LightGBM] [Info] Number of data points in the train set: 15999, number of used features: 1183
[LightGBM] [Info] Start training from score -1.997799
[LightGBM] [Info] Start training from score -2.126995
[LightGBM] [Info] Start training from score -1.090024
[LightGBM] [Info] Start training from score -2.480603
[LightGBM] [Info] Start training from score -1.236950
[LightGBM] [Info] Start training from score -3.352345


In [None]:
y_pred = ensemble.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, y_pred)*100:2.4f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=emotions))

Accuracy = 90.4250%
Classification Report:
              precision    recall  f1-score   support

       anger       0.89      0.90      0.89       539
        fear       0.87      0.86      0.87       466
         joy       0.92      0.93      0.92      1381
        love       0.82      0.83      0.83       302
     sadness       0.95      0.92      0.93      1153
    surprise       0.80      0.85      0.82       159

    accuracy                           0.90      4000
   macro avg       0.87      0.88      0.88      4000
weighted avg       0.90      0.90      0.90      4000



In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [None]:
classifiers = [['Neural Network :', MLPClassifier(max_iter = 1000)],
               ['GradientBoostingClassifier: ', GradientBoostingClassifier()],
               ['LGBM_classifier: ', LGBMClassifier()],
               ['CatBoost :', CatBoostClassifier(logging_level='Silent')],
               ['XGB :', XGBClassifier()],
               ['ExtraTreesClassifier :', ExtraTreesClassifier()],
               ['RandomForest :',RandomForestClassifier()],
               ['DecisionTree :',DecisionTreeClassifier()],
               ['LogisticRegression :', LogisticRegression(max_iter = 1000)],
               ['Kernel SVM :', SVC(kernel = 'rbf')],
               ['Linear SVM :', SVC(kernel = "linear")],
               ['SVM :', SVC()],
               ['KNeighbours :', KNeighborsClassifier()],
               ['Naive Bayes :', GaussianNB()],
               ['AdaBoostClassifier :', AdaBoostClassifier()],
               ]

for name,classifier in classifiers:
    classifier = classifier
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    print(name, accuracy_score(y_test, y_pred))

Neural Network : 0.85575
GradientBoostingClassifier:  0.85175
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.121051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3165
[LightGBM] [Info] Number of data points in the train set: 15999, number of used features: 1183
[LightGBM] [Info] Start training from score -1.997799
[LightGBM] [Info] Start training from score -2.126995
[LightGBM] [Info] Start training from score -1.090024
[LightGBM] [Info] Start training from score -2.480603
[LightGBM] [Info] Start training from score -1.236950
[LightGBM] [Info] Start training from score -3.352345
LGBM_classifier:  0.889
CatBoost : 0.90375
XGB : 0.89975
ExtraTreesClassifier : 0.89475
RandomForest : 0.88625
DecisionTree : 0.8355
LogisticRegression : 0.8925
Kernel SVM : 0.85525
Linear SVM : 0.877
SVM : 0.85525
KNeighbours : 0.568
Naive Bayes : 0.406
AdaBoostClassifier : 0.361


## Predict Single Result

In [26]:
def predict_emotion(text, classifier):
  text = text.replace("\n"," ")
  review = re.sub("[^a-zA-Z]"," ",text)
  review = review.lower()
  review = re.sub(r'http\S+|www.\S+', '', review)
  review = remove_contradictions(review)
  review = removing_not(review)
  review = review.split()
  review = removing_shortcuts(review)
  review = ' '.join([i for i in review.split() if not i.isdigit()])
  review = word_tokenize(review)
  review = removing_stopwords(review)
  lemma = WordNetLemmatizer()
  review = [lemma.lemmatize(word) for word in review]
  review = " ".join(review)
  test = cv.transform([review]).toarray()
  result = classifier.predict(test)
  result = le.inverse_transform(result)
  return result

In [28]:
text = '''Namaskar, I am Yash Vaidya. Welcome to Lok Satta's Podcast.
Friends, many things that happen around us are precious to us.
That's why Lok Satta has brought to you the Kutuhal Podcast.
Today's podcast is about plastic in the body of marine life.
In 1965, a plastic bag was found on a device used by fishermen in the Irish Sea.
The world's largest marine organization has declared that this is the first case of plastic waste in the sea water.
Since then, plastic waste has been going into the sea for decades.
According to surveys carried out by various institutions on the international level,
there is a total of 20 crore metric tons of plastic waste in the sea today.
Every year, there is a total of 381 lakh tons of plastic waste in the sea.
A large part of the sea shore is made up of broken and unusable nets of fish.
Plastic nets, small and large fish,
the bodies of many species of marine animals,
the face of which gets clogged, clogged, and becomes a physical torture.
A survey has shown that 10 lakh sea birds and 1 lakh marine animals die of plastic waste every year.
Plastic is stuck in fish of the same species on all three sides.
Due to the collision of the waves and sunlight in the sea,
slowly plastic bottles or bags become small pieces and eventually turn into very fine particles.
In recent times, the particles of very fine plastic in the sea
have entered the human body through the sea and are causing harm to the health.
In 2014, PVC and polythene were found in the rocks of the sea in Hawaii.
These rocks have been named plastic lomerits.
The Coastal Survey of Kolkata has found large amounts of such rocks in the Andaman Sea in May 2022.
These rocks have created a threat to the lives of the residents.
Research is still underway on this.
In the end, it is necessary for all ordinary citizens to come together to fight this infinite monster of plastic.
Plastic waste thrown into the sea is thrown out by the sea many times.
We have seen this on the shore many times.
Now we should respect this sea.
So you were listening to Lok Satta's Kutuhal Podcast.
To hear such new topics, visit loksatta.com's audio section.
And for the notification of new episodes, don't forget to like and follow Lok Satta's Podcast on your favorite audio platform.
'''

print(predict_emotion(text, ensemble))
print(predict_emotion(text, GB_classifier))
print(predict_emotion(text, LGBM_classifier))
print(predict_emotion(text, CB_classifier))
print(predict_emotion(text, XGBC_classifier))
print(predict_emotion(text, ET_classifier))
print(predict_emotion(text, random_forest_classifier))
print(predict_emotion(text, decision_classifier))
print(predict_emotion(text, K_nearest_classifier))
print(predict_emotion(text, kernal_classifier))
print(predict_emotion(text, svm_classifier))
print(predict_emotion(text, naive_classifier))
print(predict_emotion(text, logistic_classifier))

['joy']
['joy']
['joy']
['joy']
['joy']
['joy']
['anger']


  y = column_or_1d(y, warn=True)


['anger']
['joy']
['joy']
['sadness']
['fear']
