In [1]:
import numpy as np
import pandas as pd
from numpy import array
from numpy import cumsum

# keras
from keras.models import Sequential, Model
from keras.layers import LSTM, Dense, TimeDistributed, Bidirectional, Embedding, Input
from keras.layers import Flatten, Activation, RepeatVector, Permute, multiply, Lambda
from keras.optimizers import Adam
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras import backend

Using TensorFlow backend.


In [3]:
X_train = pd.read_csv('../X_train.csv')
X_test = pd.read_csv('../X_test.csv')

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(list(X_train['tokenized_text']))
list_tokenized_train = tokenizer.texts_to_sequences(X_train['tokenized_text'])
list_tokenized_test = tokenizer.texts_to_sequences(X_test['tokenized_text'])

In [None]:
pad_train = pad_sequences(list_tokenized_train, maxlen=150, padding='post')
pad_test = pad_sequences(list_tokenized_test, maxlen=150, padding='post')

In [None]:
y_target = pd.get_dummies(X_train['mental_state']).values
y_target_test = pd.get_dummies(X_test['mental_state']).values

In [None]:
vocab_size = len(tokenizer.word_index)+1

## Glove

In [None]:
embedding_vector = {}
f = open('glove.42B.300d.txt')
for line in tqdm(f):
    value = line.split(' ')
    word = value[0]
    coef = np.array(value[1:],dtype = 'float32')
    embedding_vector[word] = coef

In [None]:
# create the embedding matrix
embedding_matrix = np.zeros((vocab_size,300))
# check if the word exist in tokenizer
for word,i in tqdm(t.word_index.items()):
    embedding_value = embedding_vector.get(word)
    # some words may not exist in Glove
    if embedding_value is not None:
        embedding_matrix[i] = embedding_value

## BiLSTM with Attention

In [None]:
units = 64

_input = Input(shape=(pad_train.shape[1],), dtype='int64')

# get the embedding layer
embedded = embeddings_layer(embeddings=embeddings_matrix,
                            trainable=False, masking=False, scale=False, normalize=False)(_input)

activations = LSTM(units, return_sequences=True)(embedded)

# compute importance for each step
attention = TimeDistributed(Dense(1, activation='tanh'))(activations) 
attention = Flatten()(attention)
attention = Activation('softmax')(attention)
attention = RepeatVector(units)(attention)
attention = Permute([2, 1])(attention)

# apply the attention
sent_representation = multiply([activations, attention])
sent_representation = Lambda(lambda xin: backend.sum(xin, axis=0))(sent_representation)
sent_representation = Flatten()(sent_representation)

probabilities = Dense(3, activation='softmax')(sent_representation)

model = Model(inputs=_input, outputs=probabilities)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
print(model.summary())