In [1]:
import numpy as np 
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import re
import nltk
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from wordcloud import WordCloud

In [2]:
from keras.models import Sequential
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

In [3]:
train_data = pd.read_csv("emotions_dataset.txt" , header=None , sep=";" , names=["Comment" , "Emotions"] , encoding="utf-8")

In [4]:
train_data

Unnamed: 0,Comment,Emotions
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
15995,i just had a very brief time in the beanbag an...,sadness
15996,i am now turning and i feel pathetic that i am...,sadness
15997,i feel strong and good overall,joy
15998,i feel like this was such a rude comment and i...,anger


In [5]:
max([len(w) for w in train_data['Comment']])

300

In [6]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to C:\Users\ANSHUL
[nltk_data]     KUMAR\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [8]:
stopwords = nltk.corpus.stopwords.words('english')

In [10]:
lb = LabelEncoder()
train_data["Emotions"] = lb.fit_transform(train_data["Emotions"])

In [11]:
train_data.head()

Unnamed: 0,Comment,Emotions
0,i didnt feel humiliated,4
1,i can go from feeling so hopeless to so damned...,4
2,im grabbing a minute to post i feel greedy wrong,0
3,i am ever feeling nostalgic about the fireplac...,3
4,i am feeling grouchy,0


In [27]:
def text_cleaning(df , column,vocab_size ,max_len):
    lemmatizer = WordNetLemmatizer()
    corpus = []

    for text in df[column]:
        text = re.sub("[^a-zA-Z]"," ",text)
        text = text.lower()
        text = text.split()
        text = [lemmatizer.lemmatize(word) for word in text if word not in stopwords]
        text = " ".join(text)
        corpus.append(text)

    one_hot_word = [one_hot(input_text=word , n=vocab_size) for word in corpus]
    pad = pad_sequences(sequences=one_hot_word , maxlen=max_len,padding='pre')
    return pad

x_train = text_cleaning(train_data,"Comment",vocab_size=11000,max_len=300)
y_train = to_categorical(train_data["Emotions"])

    

In [17]:
x_train.shape

(16000, 300)

In [18]:
model = Sequential()




In [19]:
x_train

array([[   0,    0,    0, ..., 6907, 3331, 4123],
       [   0,    0,    0, ..., 3527, 6098, 7104],
       [   0,    0,    0, ..., 3331, 2180,  478],
       ...,
       [   0,    0,    0, ..., 9935, 1674, 6372],
       [   0,    0,    0, ..., 8444, 4649, 5039],
       [   0,    0,    0, ..., 3331,  629, 2804]])

In [20]:
y_train.shape

(16000, 6)

In [22]:
model = Sequential()
model.add(Embedding(input_dim=11000,output_dim=150,input_length=300))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(64,activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(6,activation='softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(x_train , y_train , epochs=10 , batch_size=64, verbose=1)


Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1ee192a28d0>

In [34]:
def cleaning_text(text):
    lemmatizer = WordNetLemmatizer()
    corpus = []

    text = re.sub("[^a-zA-Z]"," ",text)
    text = text.lower()
    text = text.split()
    text = [lemmatizer.lemmatize(word) for word in text if word not in stopwords]
    text = " ".join(text)
    corpus.append(text)       

    one_hot_word = [one_hot(input_text=word , n=11000) for word in corpus]
    pad = pad_sequences(sequences=one_hot_word , maxlen=300,padding='pre')
    return pad
    

In [43]:
def prediction_emotion(input):
    final_text = cleaning_text(input)
    # final_text = np.array(final_text)

    result = lb.inverse_transform(np.argmax(model.predict(final_text),axis=-1))
    probability = np.max(model.predict(final_text))
    print(f"{result} : {probability}")

In [54]:
prediction_emotion("I am not loving to do this")

['love'] : 0.8849857449531555


In [49]:
model.save('model_emotion.h5')

  saving_api.save_model(


In [51]:
with open('lb1.pkl','wb') as f:
    pickle.dump(lb,f)

vocab_info = {'vocab_size': 11000 , 'max_len' : 300}
with open('vocab_info.pkl','wb') as f:
    pickle.dump(vocab_info,f)