In [1]:
%cd /content/drive/MyDrive/Senior Project/

/content/drive/MyDrive/Senior Project


In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files

import numpy as np
import pandas as pd

import tensorflow as tf

from wordcloud import WordCloud
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, LSTM, Embedding, Bidirectional,Dropout

import re 
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer



In [3]:
df_train = pd.read_csv("train.txt", header=None,sep=';', names=["Text","Emotion"], encoding='utf-8')
df_test = pd.read_csv("test.txt", header=None,sep=';', names=["Text","Emotion"], encoding='utf-8')
df_validation = pd.read_csv("validation.txt", header=None,sep=';', names=["Text","Emotion"], encoding='utf-8')

In [4]:
df_train.head()

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [5]:
print("TRAIN DATA", df_train.shape)
print("TEST DATA", df_test.shape)
print("VALIDATION DATA", df_validation.shape)

TRAIN DATA (16000, 2)
TEST DATA (2000, 2)
VALIDATION DATA (2000, 2)


In [6]:
lb = LabelEncoder()
df_train['Emotion'] = lb.fit_transform(df_train['Emotion'])
df_test['Emotion'] = lb.fit_transform(df_test['Emotion'])
df_validation['Emotion'] = lb.fit_transform(df_validation['Emotion'])

In [7]:
nltk.download('stopwords')
stopwords = set(nltk.corpus.stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [8]:
import json
texts = list()
for line in df_train["Text"]:
	texts.append( line )
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
with open( 'word_dict.json' , 'w' ) as file:
	json.dump( tokenizer.word_index , file )

In [9]:
vocabSize = 11000
from tensorflow.keras.preprocessing.text import one_hot
def text_cleaning(df, column):
    """Removing unrelevent chars, Stemming and padding"""
    stemmer = PorterStemmer()
    corpus = []
    
    for text in df[column]:
        text = re.sub("[^a-zA-Z]", " ", text)
        text = text.lower()
        text = text.split()
        text = [stemmer.stem(word) for word in text if word not in stopwords]
        text = " ".join(text)
        corpus.append(text)
    one_hot_word = [one_hot(input_text=word, n=vocabSize) for word in corpus]
    pad = pad_sequences(sequences=one_hot_word,maxlen=max_len,padding='pre')
    print(pad.shape)
    return pad

In [10]:
df_train['length'] = [len(x) for x in df_train['Text']]
max_len=df_train['length'].max()
print(max_len)
x_train = text_cleaning(df_train, "Text")
x_test = text_cleaning(df_test, "Text")
x_val = text_cleaning(df_validation, "Text")

300
(16000, 300)
(2000, 300)
(2000, 300)


In [11]:
y_train = df_train["Emotion"]
y_test = df_test["Emotion"]
y_val = df_validation["Emotion"]

In [12]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)

In [None]:
model = Sequential()
model.add(Embedding(input_dim=vocabSize,output_dim=150,input_length=300))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(64,activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(6,activation='softmax'))

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 300, 150)          1650000   
                                                                 
 dropout (Dropout)           (None, 300, 150)          0         
                                                                 
 lstm (LSTM)                 (None, 128)               142848    
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 64)                8256      
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 6)                 3

In [None]:
callback = EarlyStopping(monitor="val_loss", patience=2, restore_best_weights=True)

In [None]:
hist = model.fit(x_train,y_train,epochs=10,batch_size=64,
                 validation_data=(x_val,y_val), verbose=1, callbacks=[callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [None]:
model.evaluate(x_val,y_val,verbose=1)




[0.3399337828159332, 0.8899999856948853]

In [None]:
model.evaluate(x_test,y_test,verbose=1)




[0.34180304408073425, 0.8840000033378601]

In [None]:

model_name = 'model_emotion_recognition'
model_name_ = model_name + '.h5'

model.save( model_name_ )
files.download( model_name_ ) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from tensorflow import keras

In [None]:
reconstructed_model = keras.models.load_model("model_emotion_recognition.h5")

In [14]:

converter = tf.lite.TFLiteConverter.from_keras_model(model)
# converter.optimizations = [ tf.lite.Optimize.DEFAULT ]
# converter.target_spec.supported_types = [ tf.float16 ]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False
buffer = converter.convert()

open( '{}_q.tflite'.format( model_name ) , 'wb' ).write( buffer )
files.download( '{}_q.tflite'.format( model_name ))


In [15]:
# converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file('models/model.h5')
converter.post_training_quantize = True
# tflite_buffer = converter.convert()
# open( 'android/model.tflite' , 'wb' ).write( tflite_buffer )

In [None]:
accuracy = hist.history['accuracy']


In [None]:
def sentence_cleaning(sentence):
    stemmer = PorterStemmer()
    corpus = []
    text = re.sub("[^a-zA-Z]", " ", sentenlece)
    text = text.lower()
    text = text.split()
    text = [stemmer.stem(word) for word in text if word not in stopwords]
    text = " ".join(text)
    corpus.append(text)
    print(corpus)
    one_hot_word = [one_hot(input_text=word, n=vocabSize) for word in corpus]
    print(one_hot_word)
    pad = pad_sequences(sequences=one_hot_word,maxlen=max_len,padding='pre')
    print(pad)
    return pad

In [16]:
# model.predict(sentence_cleaning("He is really sweet and caring"))

In [None]:
sentences = [
            "He is really sweet and carin",
            "This is outrageous, how can you talk like that?",
            "asddddddddddddddddddddddddddddddd",
            "He is really sweet and caring"
            ]
for sentence in sentences:
      print(sentence)
      sentence = sentence_cleaning(sentence)
      result = lb.inverse_transform(np.argmax(model.predict(sentence), axis=-1))[0]
      proba =  model.predict(sentence)
      print(f"{result} : {proba}\n\n")
# print(result)

He is really sweet and carin
love : [[0.05524119 0.01878206 0.4017081  0.47265777 0.0304121  0.02119883]]


This is outrageous, how can you talk like that?
anger : [[0.77247286 0.14743698 0.00844837 0.00147899 0.06255452 0.00760833]]


asddddddddddddddddddddddddddddddd
fear : [[0.31491187 0.51368916 0.01619879 0.00112034 0.14584391 0.00823602]]


He is really sweet and caring
love : [[0.01005264 0.00111309 0.08044285 0.89178646 0.00113596 0.01546905]]


