### Import Libraries

In [7]:
import warnings
warnings.filterwarnings("ignore")

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
from numpy import array
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import re
import unicodedata
from bs4 import BeautifulSoup
from textblob import TextBlob
import spacy
import pickle
nlp = spacy.load("en_core_web_sm")

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

### Data Load

In [3]:
data = pd.read_csv("text_to_emotion.csv")
data.head()
data.shape

Unnamed: 0,text,emotion
0,i feel cold,ANGER
1,i feel the cold i can say he sends it,ANGER
2,i remember feeling like my blood had run cold ...,ANGER
3,i hate too is stepping outside in the cold and...,ANGER
4,i don't think i am anti social i just don't re...,ANGER


(30000, 2)

In [5]:
data["emotion"].value_counts()

SADNESS     5000
SURPRISE    5000
ANGER       5000
LOVE        5000
JOY         5000
FEAR        5000
Name: emotion, dtype: int64

### Data Cleaning

In [8]:
contractions = { 
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have",
"he'll": "he will",
"he'll've": "he will have",
"he's": "he is",
"how'd": "how did",
"how'd'y": "how do you",
"how'll": "how will",
"how's": "how does",
"i'd": "i would",
"i'd've": "i would have",
"i'll": "i will",
"i'll've": "i will have",
"i'm": "i am",
"i've": "i have",
"isn't": "is not",
"it'd": "it would",
"it'd've": "it would have",
"it'll": "it will",
"it'll've": "it will have",
"it's": "it is",
"let's": "let us",
"ma'am": "madam",
"mayn't": "may not",
"might've": "might have",
"mightn't": "might not",
"mightn't've": "might not have",
"must've": "must have",
"mustn't": "must not",
"mustn't've": "must not have",
"needn't": "need not",
"needn't've": "need not have",
"o'clock": "of the clock",
"oughtn't": "ought not",
"oughtn't've": "ought not have",
"shan't": "shall not",
"sha'n't": "shall not",
"shan't've": "shall not have",
"she'd": "she would",
"she'd've": "she would have",
"she'll": "she will",
"she'll've": "she will have",
"she's": "she is",
"should've": "should have",
"shouldn't": "should not",
"shouldn't've": "should not have",
"so've": "so have",
"so's": "so is",
"that'd": "that would",
"that'd've": "that would have",
"that's": "that is",
"there'd": "there would",
"there'd've": "there would have",
"there's": "there is",
"they'd": "they would",
"they'd've": "they would have",
"they'll": "they will",
"they'll've": "they will have",
"they're": "they are",
"they've": "they have",
"to've": "to have",
"wasn't": "was not",
" u ": " you ",
" ur ": " your ",
" n ": " and ",
"won't": "would not",
'dis': 'this',
'bak': 'back',
'brng': 'bring'}



def cont_to_exp(x):
    if type(x) is str:
        for key in contractions:
            value = contractions[key]
            x = x.replace(key, value)
        return x
    else:
        return x


def remove_accented_chars(x):
    x = unicodedata.normalize('NFKD', x).encode('ascii', 'ignore').decode('utf-8', 'ignore')
    return x

def make_to_base(x):
    x = str(x)
    x_list = []
    doc = nlp(x)
    
    for token in doc:
        lemma = token.lemma_
        if lemma == '-PRON-' or lemma == 'be':
            lemma = token.text

        x_list.append(lemma)
    return ' '.join(x_list)

In [11]:
data.head()

Unnamed: 0,text,emotion
0,i feel cold,ANGER
1,i feel the cold i can say he sends it,ANGER
2,i remember feeling like my blood had run cold ...,ANGER
3,i hate too is stepping outside in the cold and...,ANGER
4,i do not think i am anti social i just do not ...,ANGER


In [12]:
data["text"] = data["text"].apply(lambda x: str(x).lower() )
data["text"] = data["text"].apply(lambda x: cont_to_exp(x) )
data["text"] = data["text"].apply(lambda x: remove_accented_chars(x) )
data["text"] = data["text"].apply(lambda x: re.sub(r'[^\w ]+',"",x))
data["text"] = data["text"].apply(lambda x: make_to_base(x))
data["text"] = data["text"].apply(lambda x: TextBlob(x).correct().raw_sentences[0])

In [13]:
data.head()

Unnamed: 0,text,emotion
0,i feel cold,ANGER
1,i feel the cold i can say he send it,ANGER
2,i remember feel like my blood have run cold an...,ANGER
3,i hate too is step outside in the cold and fee...,ANGER
4,i do not think i am anti social i just do not ...,ANGER


### Load Glove vector

In [32]:
glove_vect = {}

with open("glove/glove.6B.100d.txt",encoding="utf-8") as f:
    for line in f:
        val = line.split()
        word = val[0]
        vect = np.asarray(val[1:])
        glove_vect[word] = vect
    

In [33]:
len(glove_vect)
glove_vect.get("theeeeeeee")

400000

### Text to glove

In [34]:
vec_shape = 100

In [35]:
def get_vec(row):
    arr = np.zeros(vec_shape)
    text = str(row).split()
    for t in text:
        try:
            vec = glove_vect.get(t).astype(float)
            arr = arr +vec
        except:
            pass
    arr.reshape(1,-1)[0] # 1d to 2d array by reshape, hence take [0]
    return arr/len(text)

In [36]:
data["vec"] = data["text"].apply(lambda x: get_vec(x)) 

###  Train Test Split

In [37]:
X = data["vec"]
y = data["emotion"]

In [38]:
X.shape
y.shape

(30000,)

(30000,)

In [39]:
X = np.concatenate(X,axis =0).reshape(-1,vec_shape)
X.shape
y.shape

(30000, 100)

(30000,)

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)

In [41]:
X_train.shape
X_test.shape
y_train.shape
y_test.shape

(24000, 100)

(6000, 100)

(24000,)

(6000,)

### Model Training

In [42]:
#LogReg

clf = LogisticRegression(solver = 'liblinear', multi_class='auto')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

LogisticRegression(solver='liblinear')

              precision    recall  f1-score   support

       ANGER       0.68      0.65      0.66      1000
        FEAR       0.71      0.68      0.70      1000
         JOY       0.73      0.74      0.74      1000
        LOVE       0.79      0.83      0.81      1000
     SADNESS       0.73      0.72      0.73      1000
    SURPRISE       0.76      0.79      0.78      1000

    accuracy                           0.74      6000
   macro avg       0.74      0.74      0.74      6000
weighted avg       0.74      0.74      0.74      6000

[[649  88  77  40  94  52]
 [ 80 683  56  50  62  69]
 [ 69  38 743  54  40  56]
 [ 37  31  37 831  32  32]
 [ 76  80  51  37 721  35]
 [ 41  45  50  37  36 791]]


In [43]:
#SVM
clf = LinearSVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

LinearSVC()

              precision    recall  f1-score   support

       ANGER       0.69      0.64      0.67      1000
        FEAR       0.72      0.69      0.70      1000
         JOY       0.73      0.75      0.74      1000
        LOVE       0.78      0.83      0.80      1000
     SADNESS       0.73      0.73      0.73      1000
    SURPRISE       0.76      0.79      0.78      1000

    accuracy                           0.74      6000
   macro avg       0.74      0.74      0.74      6000
weighted avg       0.74      0.74      0.74      6000

[[641  85  79  47  97  51]
 [ 74 685  60  54  61  66]
 [ 63  38 749  50  41  59]
 [ 36  31  34 830  34  35]
 [ 71  71  53  38 729  38]
 [ 38  41  52  45  31 793]]


In [44]:
data["emotion"].value_counts()

SADNESS     5000
SURPRISE    5000
ANGER       5000
LOVE        5000
JOY         5000
FEAR        5000
Name: emotion, dtype: int64

### Model Save

In [45]:
pickle.dump(clf, open('model.pkl', 'wb'))

<IPython.core.display.Javascript object>

### Model testing

In [49]:
x = 'i am so happy. thanks a lot'
def get_pred(x):
    x = str(x).lower()
    x =  cont_to_exp(x)
    x =  re.sub(r'[^\w ]+',"",x)
    x = remove_accented_chars(x)
    vec = get_vec(x).reshape(-1, vec_shape)
    
    emotion = clf.predict(vec)
    
    return emotion


In [50]:
get_pred(x)

array(['JOY'], dtype=object)