# Predict Emotion

The main objective of this notebook is to predict emotions from tweets

In [1]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

In [2]:
import pickle

# Load Tokenizer

Load `.pickle` file with the tokenizer

In [3]:
tokenizer_path = Path('../datasets/sentiment_analysis/tokenizer.pickle').resolve()
with tokenizer_path.open('rb') as file:
    tokenizer = pickle.load(file)

# Load Model

Load the trained emotion recognition model

In [4]:
from tensorflow.keras.layers import Input, Embedding, SpatialDropout1D, LSTM
from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D
from tensorflow.keras.layers import Bidirectional, Conv1D, Dense, concatenate
from tensorflow.keras.models import Model

In [5]:
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
num_classes = 4
embedding_dim = 500
input_length = 100
lstm_units = 128
lstm_dropout = 0.1
recurrent_dropout = 0.1
spatial_dropout=0.2
filters=64
kernel_size=3

In [6]:
input_layer = Input(shape=(input_length,))
output_layer = Embedding(
  input_dim=input_dim,
  output_dim=embedding_dim,
  input_shape=(input_length,)
)(input_layer)

output_layer = SpatialDropout1D(spatial_dropout)(output_layer)

output_layer = Bidirectional(
LSTM(lstm_units, return_sequences=True,
     dropout=lstm_dropout, recurrent_dropout=recurrent_dropout)
)(output_layer)
output_layer = Conv1D(filters, kernel_size=kernel_size, padding='valid',
                    kernel_initializer='glorot_uniform')(output_layer)

avg_pool = GlobalAveragePooling1D()(output_layer)
max_pool = GlobalMaxPooling1D()(output_layer)
output_layer = concatenate([avg_pool, max_pool])

output_layer = Dense(num_classes, activation='softmax')(output_layer)

model = Model(input_layer, output_layer)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [7]:
model_weights_path = Path('../models/emotion_recognition/model_weights.h5').resolve()
model.load_weights(model_weights_path.as_posix())

# Load data

Load the data that will have the labels predicted by the model

**data_path**: Path to the `.csv` file that will be used

In [8]:
import pandas as pd

In [9]:
data_path = Path('../datasets/predict/coronavirus.csv').resolve()
data = pd.read_csv(data_path, encoding="latin-1",)
data.head()

Unnamed: 0,id,date,user,text
0,1242920850078216198,2020-03-25 21:06:37,xJUSTBELIEVE,In my opinion whatever problems we face togeth...
1,1242920849948192769,2020-03-25 21:06:37,GofasNicholas,Greta Thunberg has the coronavirus https://t.c...
2,1242920849168089092,2020-03-25 21:06:37,IHeartChadMM87,I still donât understand why DP keep walk wi...
3,1242920849130360833,2020-03-25 21:06:37,NeotropeNews,Sha Bandzz Donates Proceeds From âWatch Me W...
4,1242920849088315403,2020-03-25 21:06:37,deeohtee,If you did not change your Twitter username to...


# Load Encoder

Load `.pickle` file with the encoder

In [10]:
encoder_path = Path('../models/emotion_recognition/encoder.pickle').resolve()
with encoder_path.open('rb') as file:
    encoder = pickle.load(file)    

# Preprocess data

Preprocess the data that will be used

In [11]:
from nlp import preprocess
from tensorflow.keras.preprocessing.sequence import pad_sequences

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Araceli\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [12]:
data['text'] = preprocess(data.text)
sequences = [text.split() for text in data.text]
list_tokenized = tokenizer.texts_to_sequences(sequences)
x_data = pad_sequences(list_tokenized, maxlen=100)

Time to clean up: 9.92 sec


# Results

Predict the labels and generate a confusion matrix

In [13]:
import numpy as np

In [14]:
x_data.shape

(17997, 100)

In [15]:
y_pred = model.predict(x_data)

In [16]:
encoder.classes_

array(['anger', 'fear', 'joy', 'sadness'], dtype='<U7')

In [17]:
for index, value in enumerate(np.sum(y_pred, axis=0) / len(y_pred)):
    print(encoder.classes_[index] + ": " + str(value))

anger: 0.3927647
fear: 0.40790388
joy: 0.11488936
sadness: 0.08444089


In [18]:
y_pred_argmax = y_pred.argmax(axis=1)
data_len = len(y_pred_argmax)
for index, value in enumerate(np.unique(y_pred_argmax)):
    print(encoder.classes_[index] + ": " + str(len(y_pred_argmax[y_pred_argmax == value]) / data_len))

anger: 0.44462966049897207
fear: 0.47041173528921487
joy: 0.062121464688559204
sadness: 0.022837139523253877


In [22]:
data.text.iloc[0]

'opinion whatever problems face together coming months coronavirus knew last election important reasons brexit little know amp would faced current situation got right man job'

In [25]:
data.head()

Unnamed: 0,id,date,user,text
0,1242920850078216198,2020-03-25 21:06:37,xJUSTBELIEVE,opinion whatever problems face together coming...
1,1242920849948192769,2020-03-25 21:06:37,GofasNicholas,greta thunberg coronavirus
2,1242920849168089092,2020-03-25 21:06:37,IHeartChadMM87,still understand dp keep walk dog outside ever...
3,1242920849130360833,2020-03-25 21:06:37,NeotropeNews,sha bandzz donates proceeds watch work coronav...
4,1242920849088315403,2020-03-25 21:06:37,deeohtee,not change twitter username something coronavi...
