## Go To:
### <a href = "#logistic">Logistic Regression</a>
### <a href = "#nb">Naive Bayes</a>
### <a href = "#function">Final Function</a>

In [44]:
import pandas as pd    # Handle csv files
import numpy as np     # Mathematical operations
import nltk            # for text processing
from sklearn.preprocessing import LabelEncoder
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer # For word vectorization
from sklearn.model_selection import train_test_split # To split the data into train and test
from sklearn.linear_model import LogisticRegression # To perform logistic regression
from sklearn.metrics import accuracy_score # Evaluate our models
from sklearn.naive_bayes import MultinomialNB # Naive Bayes

import warnings
warnings.filterwarnings("ignore")

In [17]:
df = pd.read_csv('Emotion_classify_Data.csv')  # read the file
df.head()  # display first 5 rows

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [18]:
df.describe()

Unnamed: 0,Comment,Emotion
count,5937,5937
unique,5934,3
top,i feel like a tortured artist when i talk to her,anger
freq,2,2000


In [19]:
df.info() # Check for nulls and datatype

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5937 entries, 0 to 5936
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Comment  5937 non-null   object
 1   Emotion  5937 non-null   object
dtypes: object(2)
memory usage: 92.9+ KB


In [20]:
# Encode the emotion labels to be values rather than words to assist with learning
encoder = LabelEncoder()
df["Emotion"] = encoder.fit_transform(df["Emotion"])

#### Emotion=0 "anger", Emotion=1 "fear", Emotion=2 "joy" .

In [21]:
df.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,1
1,im so full of life i feel appalled,0
2,i sit here to write i start to dig out my feel...,1
3,ive been really angry with r and i feel like a...,2
4,i feel suspicious if there is no one outside l...,1


In [22]:
nltk.download("stopwords") # Get stop words to later remove them from data
stopwords.words("english")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Skully\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [23]:
def preprocess(Comment):
    
    # Lowercase each comment
    Comment = Comment.lower()

    # Tokenize the comments
    words = nltk.word_tokenize(Comment)

    # Initialize the Porter Stemmer
    stemmer = PorterStemmer()

    # Remove stopwords and applying stemming and lemmatization
    filtered_words = []
    for word in words:
        if word not in stopwords.words('english') and word.isalnum(): # check that the word is alphanumeric and not a stop word.
            filtered_words.append(stemmer.stem(word))
    

    # Join the filtered words back into a single string
    preprocessed_comment = ' '.join(filtered_words)

    return preprocessed_comment

In [24]:
df["Comment"]=df["Comment"].apply(preprocess) # Applying the function to all the comments in the dataframe and replacing them
df.head()

Unnamed: 0,Comment,Emotion
0,serious hate one subject death feel reluct drop,1
1,im full life feel appal,0
2,sit write start dig feel think afraid accept p...,1
3,ive realli angri r feel like idiot trust first...,2
4,feel suspici one outsid like raptur happen someth,1


In [26]:
count_vectorizer = CountVectorizer()  # Initialize count vectorizer
x = count_vectorizer.fit_transform(df["Comment"]).toarray() # vectorize the comments and transform them into arrays
x

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [30]:
# We prepared the data above so now we prepare the labels
y = df["Emotion"].values  # We used .values to extract the values into a numpy array
y

array([1, 0, 1, ..., 2, 1, 0])

In [32]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size = 0.2,random_state = 42) # 20% of data for test

<a id = "logistic"></a> 
### Logistic Regression

In [36]:
model = LogisticRegression()    # Initializing model
model.fit(x_train,y_train)    # Training

LogisticRegression()

In [40]:
prediction = model.predict(x_test) # Test the model

In [43]:
accuracy = accuracy_score(y_test,prediction) # Evaluate the model
print("Model accuracy: ",accuracy)

Model accuracy:  0.9410774410774411


<a id = "nb"></a>
### Naive Bayes

In [45]:
model2 = MultinomialNB()        # Initializing model
model2.fit(x_train,y_train)   # Training

MultinomialNB()

In [46]:
prediction = model2.predict(x_test) # Test the model

In [47]:
accuracy = accuracy_score(y_test,prediction) # Evaluate the model
print("Model accuracy: ",accuracy)

Model accuracy:  0.8947811447811448


<a id = "function"></a>
### Final Function

In [75]:
def sentimentor(text):
    transformed_text = preprocess(text) # Preprocess text
    vectorized_text = count_vectorizer.transform([transformed_text]).toarray() # Vectorize text
    prediction1 = model.predict(vectorized_text) # Make prediction using logistic regression
    prediction2 = model2.predict(vectorized_text) # Make prediction using Naive Bayes

    # Print the prediction of Logistic Regression
    if prediction1 == 0:
        print("Logistic Regression thinks the emotion is anger")
    elif prediction1 == 1:
        print("Logistic Regression thinks the emotion is fear")
    else:
        print("Logistic Regression thinks the emotion is joy")

    # Print the prediction of Naive Bayes    
    if prediction2 == 0:
        print("Naive Bayes thinks the emotion is anger")
    elif prediction2 == 1:
        print("Naive Bayes thinks the emotion is fear")
    else:
        print("Naive Bayes thinks the emotion is joy")   
        return None

In [80]:
text1 = "i love cooking then eating what i cooked as it makes me feel good and rewarded!"
text2 = "I am afraid all my work is going in vayne as I fade away in a world of deprecating emotions"
text3 = "I hate all this social media conundrums!"

In [83]:
sentimentor(text1)

Logistic Regression thinks the emotion is joy
Naive Bayes thinks the emotion is joy
