In [1]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow import keras

import re 
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

import numpy as np
import pandas as pd

In [2]:
test_data = pd.read_csv("data/test.txt", header=None, sep=";", names=["Comment","Emotion"], encoding="utf-8")
train_data = pd.read_csv("data/train.txt", header=None, sep=";", names=["Comment","Emotion"], encoding="utf-8")
validation_data = pd.read_csv("data/val.txt", header=None, sep=";", names=["Comment","Emotion"], encoding="utf-8")

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.preprocessing import LabelEncoder

In [4]:
lb = LabelEncoder()
train_data['Emotion'] = lb.fit_transform(train_data['Emotion'])
test_data['Emotion'] = lb.fit_transform(test_data['Emotion'])
validation_data['Emotion'] = lb.fit_transform(validation_data['Emotion'])

In [5]:
nltk.download('stopwords')
stopwords = set(nltk.corpus.stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sauga_g\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
train_data['length'] = [len(comment) for comment in train_data['Comment']]

vocab_size = 10000
max_len=train_data['length'].max()

In [7]:
model = keras.models.load_model('model.h5')

In [8]:
def sentence_cleaning(sentence):
    """Pre-processing sentence for prediction"""
    stemmer = PorterStemmer()
    corpus = []
    text = re.sub("[^a-zA-Z]", " ", sentence)
    text = text.lower()
    text = text.split()
    text = [stemmer.stem(word) for word in text if word not in stopwords]
    text = " ".join(text)
    corpus.append(text)
    one_hot_word = [one_hot(input_text=word, n=vocab_size) for word in corpus]
    pad = pad_sequences(sequences=one_hot_word,maxlen=max_len,padding='pre')
    return pad

In [9]:
def getEmotion(sentence):
    sentence = sentence_cleaning(sentence)
    result = lb.inverse_transform(np.argmax(model.predict(sentence), axis=-1))[0]
    return result

In [10]:
def getDict(sentence):
    sentence = sentence_cleaning(sentence)
    labels = lb.inverse_transform([0,1,2,3,4,5])
    final_dir = dict(zip(labels,model.predict(sentence)[0]))
    return final_dir

In [15]:
getEmotion('she is really sweet and caring')

'love'