## EMotion Detection from Text Data

We have taken the dataset from the Kaggle https://www.kaggle.com/c/sa-emotions

### Load the Libraries

In [9]:
import re
import nltk
import sklearn
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, load_model
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import Embedding,LSTM, Dense, SpatialDropout1D
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

In [10]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 6044468336669264752]

### Read the dataset

In [11]:
#data = pd.read_csv('data/train_data.csv')
data_raw = pd.read_csv('data/isear.csv', error_bad_lines=False,sep="|")
data=pd.DataFrame({'content':data_raw['SIT'],'sentiment':data_raw['Field1']})

In [39]:
class EmotionClassifier:
    def __init__(self):
        self.model = None
        self.label_encoder = None

        # most used n words
        self.max_num_words = None
        # max length of each text
        self.max_text_length = None
        # feature size of each vector
        self.feature_vector_size = None

        self.emotions_labels_map = {'anger': 0, 'disgust': 1, 'fear': 2, 'guilt': 3, 'joy': 4, 'sadness': 5, 'shame': 6}
        self.labels_emotions_map = {0: 'anger', 1: 'disgust', 2: 'fear', 3: 'guilt', 4: 'joy', 5: 'sadness', 6: 'shame'}

    def clean_text(text):
        """Clean_text"""
        text = re.sub('[^a-zA-z0-9\s]', '', text)
        stop_words_list = set(stopwords.words('english'))
        lemmatizer = WordNetLemmatizer()
        words = nltk.word_tokenize(text)
        words = [re.sub("\\\\", '', word.lower()) for word in words if
                 word.lower() not in stop_words_list and word.isalpha()]  # filter(lambda word: word not in stop_words_list, text.split())
        words = [lemmatizer.lemmatize(word) for word in words]

        text = " ".join(words)
        return text

    def fit_label_encoder(self, labels):
        """Fit the label enocder to one-hot encode the labels"""
        self.label_encoder = OneHotEncoder(categories='auto')
        self.label_encoder.fit(labels)

    def encode_labels(self, labels):
        """Encode the labels"""
        return self.label_encoder.transform(labels).toarray()

    def create_tokenizer(self, texts, max_num_words, max_text_length=500):
        """Fit a text tokenizer"""
        self.max_text_length = max_text_length
        self.max_num_words = max_num_words
        self.tokenizer = Tokenizer(num_words=max_num_words, lower=True, split=' ')
        self.tokenizer.fit_on_texts(np.array(texts, dtype=object))

    def map_features(self, texts):
        """Map text to feature vetcors"""
        feature_vectors = self.tokenizer.texts_to_sequences(np.array(texts, dtype=object))
        feature_vectors = pad_sequences(feature_vectors, maxlen=self.max_text_length)
        self.feature_vector_size = feature_vectors.shape[1];
        return feature_vectors

    def create_model(self, embed_dim, lstm_units):
        """Create a LSTM Neural Netwrok with Convolution layer"""
        self.model = Sequential()
        self.model.add(Embedding(self.max_num_words, embed_dim, input_length=self.feature_vector_size))
        self.model.add(Dropout(0.5))
        self.model.add(Conv1D(filters=32, kernel_size=5, padding='same', activation='relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Conv1D(filters=32, kernel_size=5, padding='same', activation='relu'))
        self.model.add(Dropout(0.5))
        self.model.add(MaxPooling1D(pool_size=2))
        self.model.add(SpatialDropout1D(0.5))
        self.model.add(LSTM(lstm_units, dropout=0.5, recurrent_dropout=0.5))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(7, activation='softmax'))

    def compile_model(self, loss_function, optimizer, metrics):
        """Compile the model"""
        self.model.compile(loss=loss_function, optimizer=optimizer, metrics=[metrics])

    def train(self, X_train, Y_train, X_valid, Y_valid, epochs=10, batch_size=128, verbose=0):
        """Train the model"""
        self.model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), batch_size=batch_size,
                       epochs=epochs, verbose=verbose)

    def classify(self, text):
        """Classify Emotions"""
        text = self.clean_text(text)
        feature_vectors = self.map_features(text)
        predicted_label = np.argmax(self.model.predict(feature_vectors), axis=1)
        return self.labels_emotions_map[predicted_label]

    def load_model(self, model_path):
        """Load pre-trained model"""
        self.model = load_model(model_path)

    def save_model(self, model_path):
        self.model.save(model_path)


In [7]:
max_num_words=4000
max_text_length=500
embed_dim =128
lstm_units =128
emotionClassifier=EmotionClassifier()
data['content']=data['content'].apply(emotionClassifier.clean_text)
data['sentiment_label']=[emotionClassifier.emotions_labels_map[sentiment] for sentiment in data['sentiment']]
emotionClassifier.create_tokenizer(data['content'],max_num_words,max_text_length)
feature_vectors = emotionClassifier.map_features(data['content'])
labels = np.array(data['sentiment_label']).reshape(-1,1)
emotionClassifier.fit_label_encoder(labels)
labels = emotionClassifier.encode_labels(labels)
emotionClassifier.create_model(embed_dim, lstm_units)
emotionClassifier.compile_model(loss_function='categorical_crossentropy', optimizer='rmsprop', metrics='accuracy')

In [13]:
emotionClassifier.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 500, 128)          512000    
_________________________________________________________________
dropout_9 (Dropout)          (None, 500, 128)          0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 500, 32)           20512     
_________________________________________________________________
dropout_10 (Dropout)         (None, 500, 32)           0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 500, 32)           5152      
_________________________________________________________________
dropout_11 (Dropout)         (None, 500, 32)           0         
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 250, 32)           0         
__________

In [14]:
X_train, X_valid, Y_train, Y_valid = train_test_split(feature_vectors,labels, test_size = 0.2, random_state = 42)
emotionClassifier.train(X_train, Y_train, X_valid, Y_valid, batch_size=32, epochs=20, verbose=1)

Train on 6132 samples, validate on 1534 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Evaluation

**Training**

In [16]:
pred = np.argmax(emotionClassifier.model.predict(X_train),axis=1)
Y=np.argmax(Y_train,axis=1)
confusionMatrix=pd.DataFrame(confusion_matrix(pred, Y))
confusionMatrix.columns = emotionClassifier.emotions_labels_map.keys()
confusionMatrix.index = emotionClassifier.emotions_labels_map.keys()
confusionMatrix

Unnamed: 0,anger,disgust,fear,guilt,joy,sadness,shame
anger,608,41,7,62,1,11,44
disgust,75,703,34,22,10,16,31
fear,3,24,728,7,9,17,9
guilt,43,5,0,457,0,4,46
joy,12,30,74,32,817,235,46
sadness,10,3,10,18,9,486,28
shame,122,66,35,290,31,95,666


**Validation**

In [34]:
data['content'][0],data['sentiment'][0]

('During the period of falling in love, each time that we met and á especially when we had not met for a long time.',
 'joy')

In [41]:
emotionClassifier.classify(data['content'][0])

AttributeError: 'EmotionClassifier' object has no attribute 'clean'

In [17]:
pred = np.argmax(emotionClassifier.model.predict(X_valid),axis=1)
Y=np.argmax(Y_valid,axis=1)
confusionMatrix=pd.DataFrame(confusion_matrix(pred, Y))
confusionMatrix.columns = emotionClassifier.emotions_labels_map.keys()
confusionMatrix.index = emotionClassifier.emotions_labels_map.keys()
confusionMatrix

Unnamed: 0,anger,disgust,fear,guilt,joy,sadness,shame
anger,103,16,2,28,0,6,25
disgust,31,133,20,13,6,10,21
fear,9,17,134,11,10,5,15
guilt,24,5,2,56,0,1,20
joy,15,18,20,16,166,71,35
sadness,7,1,4,10,6,108,11
shame,34,34,25,71,29,31,99


In [None]:
When my grandfather died

In [23]:
import pickle
emotionClassifier.save_model('model/lstm_model.h5')
emotionClassifier.model = None
output_file = 'model/emotion_classifer.pkl'
with open(output_file, 'wb') as output:
    pickle.dump(emotionClassifier, output, pickle.HIGHEST_PROTOCOL)

PicklingError: Can't pickle <class '__main__.EmotionClassifier'>: it's not the same object as __main__.EmotionClassifier

In [47]:
with open('model/emotion_classifer.pkl', 'rb') as input:
    model = pickle.load(input)
    model.load_model('model/lstm_model.h5')

    model.tokenizer.oov_token = None

In [48]:
model.classify("During the period of falling in love, each time that we met and á especially when we had not met for a long time")

'joy'

In [26]:
pred = np.argmax(emotionClassifier.model.predict(X_valid),axis=1)
Y=np.argmax(Y_valid,axis=1)
confusionMatrix=pd.DataFrame(confusion_matrix(pred, Y))
confusionMatrix.columns = emotionClassifier.emotions_labels_map.keys()
confusionMatrix.index = emotionClassifier.emotions_labels_map.keys()
confusionMatrix

Unnamed: 0,anger,disgust,fear,guilt,joy,sadness,shame
anger,103,16,2,28,0,6,25
disgust,31,133,20,13,6,10,21
fear,9,17,134,11,10,5,15
guilt,24,5,2,56,0,1,20
joy,15,18,20,16,166,71,35
sadness,7,1,4,10,6,108,11
shame,34,34,25,71,29,31,99
