### Let's try some embedding

In [None]:
!pip3 install tensorflow
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.layers import Dense , Input , LSTM , Embedding, Dropout , Activation, GRU, Flatten
from keras.layers import Bidirectional, GlobalMaxPool1D
from keras.models import Model, Sequential
from keras.layers import Convolution1D
from keras import initializers, regularizers, constraints, optimizers, layers

from keras.utils import plot_model

import pickle
import numpy as np
import csv
from sklearn.metrics import f1_score, confusion_matrix

Collecting tensorflow
  Downloading https://files.pythonhosted.org/packages/77/63/a9fa76de8dffe7455304c4ed635be4aa9c0bacef6e0633d87d5f54530c5c/tensorflow-1.13.1-cp36-cp36m-manylinux1_x86_64.whl (92.5MB)
[?25l[K    0% |                                | 10kB 3.5MB/s eta 0:00:27[K    0% |                                | 20kB 555kB/s eta 0:02:47[K    0% |                                | 30kB 809kB/s eta 0:01:55[K    0% |                                | 40kB 520kB/s eta 0:02:58[K    0% |                                | 51kB 618kB/s eta 0:02:30[K    0% |                                | 61kB 732kB/s eta 0:02:07[K    0% |                                | 71kB 810kB/s eta 0:01:55[K    0% |                                | 81kB 919kB/s eta 0:01:41[K    0% |                                | 92kB 750kB/s eta 0:02:04[K    0% |                                | 102kB 830kB/s eta 0:01:52[K    0% |                                | 112kB 847kB/s eta 0:01:50[K    0% |     

In [None]:
MAX_NB_WORDS = 40000 # max no. of words for tokenizer
MAX_SEQUENCE_LENGTH = 30 # max length of text (words) including padding
VALIDATION_SPLIT = 0.2
EMBEDDING_DIM = 200 # embedding dimensions for word vectors (word2vec/GloVe)
GLOVE_DIR = "data/demo/glove.twitter.27B."+str(200)+"d.txt"

In [None]:
import csv
texts, labels = [], []
print("[i] Reading from csv file...", end="")
with open('data/demo/data.csv') as csvfile:
    readCSV = csv.reader(csvfile, delimiter=',')
    for row in readCSV:
        texts.append(row[0])
        labels.append(row[1])
print("Done!")

In [None]:
len(texts)

Dataset has been labeled on 5 emotions: "Neutral", "Happy", "Sad", "Hate", "Anger"

In [None]:
texts

In [None]:
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)
with open('tokenizerv1.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
print("[i] Saved word tokenizer to file: tokenizerv1.pickle")

In [None]:
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
print('[i] Found %s unique tokens.' % len(word_index))
data_int = pad_sequences(sequences, padding='pre', maxlen=(MAX_SEQUENCE_LENGTH-5))
data = pad_sequences(data_int, padding='post', maxlen=(MAX_SEQUENCE_LENGTH))

In [None]:
from keras.utils.np_utils import to_categorical
labels = to_categorical(np.asarray(labels)) # convert to one-hot encoding vectors
print('[+] Shape of data tensor:', data.shape)
print('[+] Shape of label tensor:', labels.shape)

In [None]:
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

In [None]:
x_train = data[:-nb_validation_samples]
y_train = labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = labels[-nb_validation_samples:]

In [None]:
embeddings_index = {}
f = open(GLOVE_DIR,  encoding='utf-8')
for line in f:
    values = line.split()
    word = values[0]
    embeddings_index[word] = np.asarray(values[1:], dtype='float32')
f.close()
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

In [None]:
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')

# static channel
embedding_layer_frozen = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False)
embedded_sequences_frozen = embedding_layer_frozen(sequence_input)

In [None]:
lstm_1 = (Bidirectional(LSTM(32, return_sequences = True)))(embedded_sequences_frozen)
pool_1 = (GlobalMaxPool1D())(lstm_1)
dense_1 = (Dense(20, activation="relu"))(pool_1)
dropout_1 = (Dropout(0.05))(dense_1)
preds = (Dense(5, activation="softmax"))(dropout_1)

advanced_model = Model(sequence_input, preds)
advanced_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

batch_size = 100
epochs = 10
advanced_model.fit(x_train,y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)

In [None]:
y_pred_adv = advanced_model.predict(x_val)
pred_adv = np.argmax(y_pred_adv, axis=1)
real_adv = np.argmax(y_val, axis=1)
print('F1-score: {0}'.format(f1_score(pred_adv, real_adv, average='micro')))
print('Confusion matrix:')
confusion_matrix(pred_adv, real_adv)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
classes = ["neutral", "happy", "sad", "hate","anger"]
print(classification_report(pred_adv, real_adv, target_names=classes))

In [None]:
def predict_emotion(text):
    sequences = tokenizer.texts_to_sequences([text])
    word_index = tokenizer.word_index
    data_int = pad_sequences(sequences, padding='pre', maxlen=(MAX_SEQUENCE_LENGTH-5))
    data = pad_sequences(data_int, padding='post', maxlen=(MAX_SEQUENCE_LENGTH))
    prediction_ar = advanced_model.predict(data)
    pr = np.argmax(prediction_ar, axis=1)
    print(prediction_ar)
    if pr == 0:
        return "Neutral"
    if pr == 1:
        return "Happy"
    if pr == 2:
        return "Sad"
    if pr == 3:
        return "Hate"
    if pr == 4:
        return "Anger"
    return pr

print(predict_emotion("What a lovely day! What a lovely day! What a lovely day!"))
print(predict_emotion("I hate you"))
print(predict_emotion("A king in the castle"))
print(predict_emotion("i am so sorry for what happened!"))
print(predict_emotion("I'm angry of you!"))

print(predict_emotion("The program normally began to work only in the morning!!"))

In [None]:
print(predict_emotion("""When you were here before
Couldn't look you in the eye
You're just like an angel
Your skin makes me cry
You float like a feather
In a beautiful world
And I wish I was special
You're so fuckin' special
But I'm a creep, I'm a weirdo.
What the hell am I doing here?
I don't belong here.
I don't care if it hurts
I want to have control
I want a perfect body
I want a perfect soul
I want you to notice
When I'm not around
You're so fuckin' special
I wish I was special
But I'm a creep, I'm a weirdo.
What the hell am I doing here?
I don't belong here.
She's running out again,
She's running out
She's run run run run
Whatever makes you happy
Whatever you want
You're so fuckin' special
I wish I was special
But I'm a creep, I'm a weirdo,
What the hell am I doing here?
I don't belong here.
I don't belong here."""))