In [45]:
import pandas as pd
from sklearn.pipeline import make_pipeline

In [46]:
data = pd.read_csv('Emotion_classify_Data.csv') #Read Data
data.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [47]:
data['Emotion'].unique() #to knowing our labels

array(['fear', 'anger', 'joy'], dtype=object)

In [48]:
data.isnull().sum() #for checking that we don't have any nulls in our data

Comment    0
Emotion    0
dtype: int64

# Preprocessing

In [49]:
# preprocessing
import re
def preprocessing(text):
    text = re.sub(r'\W', ' ', text)      # Remove non-word characters (!!! , ...)
    text = re.sub(r'\s+', ' ', text)     # Remove extra spaces or using .strip()
    text = text.lower()                   # Convert to lowercase
    return text

In [50]:
#test
preprocessing("   My name is Georgino   ......  i love football !!!")

' my name is georgino i love football '

In [51]:
data['Comment'] = data['Comment'].apply(preprocessing)

# Tokenization , stemming and lemmatization

In [52]:
#Tokenization
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Gorgino\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Gorgino\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Gorgino\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [53]:
# important stop words in english
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
stop_words

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [54]:
# Stemming
#remove suffixes

from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

#testing
stemmer.stem('playing')

'play'

In [55]:
#lemmatization 
#Gramatical information

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

#testing
lemmatizer.lemmatize("studies")

'study'

In [56]:
def tokenize(text):
    words = nltk.word_tokenize(text) #split every word or character
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    words = [stemmer.stem(word) for word in words]
    return ' '.join(words)

In [57]:
#test
tokenize("my name is Georgino ,i am watching tv , i am happy . i have many studies .")

'name georgino , watch tv , happi . mani studi .'

In [58]:
print("Data before tokenize\n")
data['Comment'].head()

Data before tokenize



0    i seriously hate one subject to death but now ...
1                   im so full of life i feel appalled
2    i sit here to write i start to dig out my feel...
3    ive been really angry with r and i feel like a...
4    i feel suspicious if there is no one outside l...
Name: Comment, dtype: object

In [59]:
data['Comment'] = data['Comment'].apply(tokenize)

In [60]:
print("Data after tokenize\n")
data['Comment'].head()

Data after tokenize



0      serious hate one subject death feel reluct drop
1                              im full life feel appal
2    sit write start dig feel think afraid accept p...
3    ive realli angri r feel like idiot trust first...
4    feel suspici one outsid like raptur happen someth
Name: Comment, dtype: object

In [61]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data['Emotion'] = le.fit_transform(data['Emotion'])
print("Label: anger = value: 0")
print("Label: fear = value: 1")
print("Label: joy = value: 2")

Label: anger = value: 0
Label: fear = value: 1
Label: joy = value: 2


In [62]:
data.head()

Unnamed: 0,Comment,Emotion
0,serious hate one subject death feel reluct drop,1
1,im full life feel appal,0
2,sit write start dig feel think afraid accept p...,1
3,ive realli angri r feel like idiot trust first...,2
4,feel suspici one outsid like raptur happen someth,1


# Split the Data

In [63]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

X_train, X_test, y_train, y_test = train_test_split(data['Comment'], data['Emotion'], test_size=0.2, random_state=42)

# Logistic Regression

In [64]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,precision_score 

log_reg = make_pipeline(TfidfVectorizer(), LogisticRegression())
log_reg.fit(X_train, y_train)

y_log_pred=log_reg.predict(X_test)  #predicting on test data
yt_log_pred=log_reg.predict(X_train)    #predicting on train data
log_reg_acc=accuracy_score(y_test,y_log_pred)   #accuracy on test data
log_reg_prec=precision_score(y_test,y_log_pred,average='macro') #precision on test data

tr_log_reg_acc=accuracy_score(y_train,yt_log_pred)  #accuracy on test data
tr_log_reg_prec=precision_score(y_train,yt_log_pred,average='macro') #precision on test data

#printing accuracy and precision
print("accuracy score on train data is ",tr_log_reg_acc)
print("precision score on train data is ",tr_log_reg_prec)
print("accuracy score on test data is ",log_reg_acc)
print("precision score on test data is ",log_reg_prec)

accuracy score on train data is  0.9793640766477153
precision score on train data is  0.9794326442988702
accuracy score on test data is  0.9259259259259259
precision score on test data is  0.9258979865506665


# Naive Bayes

In [65]:
from sklearn.naive_bayes import MultinomialNB

naive_bayes = make_pipeline(TfidfVectorizer(), MultinomialNB())
naive_bayes.fit(X_train, y_train)

naive_bayes_pred=naive_bayes.predict(X_test)    #prediction on test data
naive_bayes_tr_pred=naive_bayes.predict(X_train)    #prediction on train data
naive_bayes_acc=accuracy_score(y_test,naive_bayes_pred) #accuracy on test data
naive_bayes_prec=precision_score(y_test,naive_bayes_pred,average='macro')   #precision on test data
naive_bayes_tr_acc=accuracy_score(y_train,naive_bayes_tr_pred)  #accuracy on train data
naive_bayes_tr_prec=precision_score(y_train,naive_bayes_tr_pred,average='macro')    #precision on train data
#printing accuracy and precision
print("accuracy score on train data is ",naive_bayes_tr_acc)
print("precision score on train data is ",naive_bayes_tr_prec)
print("accuracy score on test data is ",naive_bayes_acc)
print("precision score on test data is ",naive_bayes_prec)

accuracy score on train data is  0.976416087597389
precision score on train data is  0.9765993495178797
accuracy score on test data is  0.8998316498316499
precision score on test data is  0.9003105989072626


In [66]:
# final function that Predict the emotion 
def predict_emotion(text):
    # Preprocess input text
    text = preprocessing(text)
    text = tokenize(text)

    # Predict with Logistic Regression
    log_reg_prediction = log_reg.predict([text])[0]

    # Predict with Naive Bayes
    naive_bayes_prediction = naive_bayes.predict([text])[0]

    # Inverse transform to get the original emotion labels (anger , fear , joy)
    log_reg_prediction_label = le.inverse_transform([log_reg_prediction])[0]
    naive_bayes_prediction_label = le.inverse_transform([naive_bayes_prediction])[0]

    return log_reg_prediction_label, naive_bayes_prediction_label

In [69]:
input1 = "I really hate the school and i have a lot of homeworks !!!"
input2 = "I am so full of energy , i feel excited to start my day"
input3 = "I watched a horror movie ; I start to dig out my feelings..."
user_input = input("")

inp1_log_reg, inp1_naive_bayes = predict_emotion(input1)
inp2_log_reg, inp2_naive_bayes = predict_emotion(input2)
inp3_log_reg, inp3_naive_bayes = predict_emotion(input3)
inp4_log_reg, inp4_naive_bayes = predict_emotion(user_input)


# Print Results
print(f"input 1 - Logistic Regression: {inp1_log_reg}, Naive Bayes: {inp1_naive_bayes}")
print(f"input 2 - Logistic Regression: {inp2_log_reg}, Naive Bayes: {inp2_naive_bayes}")
print(f"input 3 - Logistic Regression: {inp3_log_reg}, Naive Bayes: {inp3_naive_bayes}")
print(f"input 4 - Logistic Regression: {inp4_log_reg}, Naive Bayes: {inp4_naive_bayes}")

my favorite player scored yesterday !!
input 1 - Logistic Regression: anger, Naive Bayes: anger
input 2 - Logistic Regression: joy, Naive Bayes: joy
input 3 - Logistic Regression: fear, Naive Bayes: fear
input 4 - Logistic Regression: joy, Naive Bayes: joy
