### LSTM with Using Different Layers

In [1]:
import glove
import keras
import labeled_functions
import matplotlib.pyplot as plt
import numpy as np
from keras.layers import LSTM, Dense, Input, Embedding, Conv1D
from keras.layers import MaxPooling1D, GlobalMaxPooling1D, Flatten
from keras.layers.recurrent import LSTM # Don't know the difference
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import text_to_word_sequence, one_hot, Tokenizer
from sklearn.metrics import precision_recall_curve, precision_score
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix
from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


#### Loading, Splitting, Preprocessing Dataset

In [2]:
X_corpus, y = labeled_functions.load_with_path(path=r"/Users/ekremguzelyel/Desktop/Assignments/Research/MLLab-IIT/edu/active_learning")

X_sequence = []
for i in X_corpus:
    X_sequence.append(text_to_word_sequence(i))

max_length=0
for i in X_sequence:
    max_length = [len(i) if len(i) > max_length else max_length][0]
print("Max Length:", max_length)

Labeled Data loaded.
Max Length: 123


_The reason I use sequence and corpus seperately is because when I use sequence my goal is to find the __maximum length__ of all sequences. If I use the same sequences for one-hot it gives me an error. Instead, I use __original corpus__ for direct translation of text to one-hot encoded sequences._

In [3]:
# One-hot Encoding and Padding Sequences
X_corpus_one_hot = []
for i in X_corpus:
    X_corpus_one_hot.append(one_hot(i, round(max_length*1.2)))
print("One-hot encoded.")

padded_seq_corpus = pad_sequences(X_corpus_one_hot)
print("Sequences padded.")

X_train_padded_seq, X_test_padded_seq, y_train, y_test = train_test_split(padded_seq_corpus, y,
                                                                          test_size=1./3, random_state=42)
print("Train-Test padded sequences split.")

One-hot encoded.
Sequences padded.
Train-Test padded sequences split.


#### Embedding Layer Implementation

In [4]:
t = Tokenizer()
t.fit_on_texts(X_corpus)
vocab_size = len(t.word_index) + 1
encoded_docs = t.texts_to_sequences(X_corpus)
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='pre')

In [5]:
# load the whole embedding into memory
embeddings_index = dict()
f = open('../../../glove.6B/glove.6B.100d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 400000 word vectors.


In [6]:
# create a weight matrix for words in training docs
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in t.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

#### Training LSTM with CNN

In [7]:
# Different combinations tested.
input_layer = Input(shape=(X_train_padded_seq.shape[1],))

e = Embedding(vocab_size,
              100,
              weights=[embedding_matrix],
              input_length=X_train_padded_seq.shape[1],
              trainable=False)(input_layer)
hidden1 = LSTM(32, return_sequences=True)(e)

cnn1 = Conv1D(128, 5, activation='tanh')(hidden1)
pool1 = MaxPooling1D(5)(cnn1)
cnn2 = Conv1D(128, 5, activation='relu')(pool1)
pool2 = GlobalMaxPooling1D()(cnn2)

# hidden2 = Dense(10, activation='relu')(pool2)
output_layer = Dense(1, activation='sigmoid')(pool2)
model = Model(inputs=input_layer, outputs=output_layer)


model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')
print("Model Compiled")

Model Compiled


In [8]:
hist = model.fit(X_train_padded_seq, y_train, epochs=20, batch_size=32, verbose=0)
print("Model fitted.")
preds = model.predict(X_test_padded_seq) # It gives probabilities

Model fitted.


##### Results for LSTM + CNN

In [9]:
# Convert predictions to binary.
threshold, pred = 0.5, []
[pred.append(1 if preds[i]>threshold else 0) for i,j in enumerate(preds)]

score_test = hist.model.evaluate(X_test_padded_seq, y_test)
score_train = hist.model.evaluate(X_train_padded_seq, y_train)
score_test, score_train

precision, recall, f1, support = precision_recall_fscore_support(y_test, pred)

print("\nTestAcc", "TrainAcc", "Prec", "Rec", "F1","Sup\n", score_test[1], score_train[1], precision, recall, f1, support, sep="  ")


TestAcc  TrainAcc  Prec  Rec  F1  Sup
  0.550947867157335  0.9039145911008579  [0.58156028 0.52019002]  [0.54910714 0.5530303 ]  [0.56486797 0.53610771]  [448 396]


#### Training Stacked LSTM Layers

In [10]:
# Different combinations tested.
input_layer = Input(shape=(X_train_padded_seq.shape[1],))

e = Embedding(vocab_size,
              100,
              weights=[embedding_matrix],
              input_length=X_train_padded_seq.shape[1],
              trainable=False)(input_layer)
hidden1 = LSTM(32, return_sequences=True)(e)
hidden2 = LSTM(32, return_sequences=False)(hidden1)
hidden3 = Dense(10, activation='relu')(hidden2)

output_layer = Dense(1, activation='sigmoid')(hidden3)
model = Model(inputs=input_layer, outputs=output_layer)

model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')
print("Model Compiled")
model.summary()

Model Compiled
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 123)               0         
_________________________________________________________________
embedding_2 (Embedding)      (None, 123, 100)          416400    
_________________________________________________________________
lstm_2 (LSTM)                (None, 123, 32)           17024     
_________________________________________________________________
lstm_3 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_2 (Dense)              (None, 10)                330       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 11        
Total params: 442,085
Trainable params: 25,685
Non-trainable params: 416,400
__________________________________________________

In [11]:
hist_stacked = model.fit(X_train_padded_seq, y_train, epochs=20, batch_size=32, verbose=1, validation_split=(1./3), shuffle=False)
print("Model fitted.")
# hist_on_test= model.fit(X_evalt , )
preds_stacked = model.predict(X_test_padded_seq) # It gives probabilities

Train on 1124 samples, validate on 562 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model fitted.


##### Results for Stacked LSTM

In [None]:
X_train_padded_seq.shape

In [None]:
# Convert predictions to binary.
threshold, pred = 0.5, []
[pred.append(1 if preds[i]>0.5 else 0) for i,j in enumerate(preds)]

score_test = hist_stacked.model.evaluate(X_test_padded_seq, y_test)
score_train = hist_stacked.model.evaluate(X_train_padded_seq, y_train)
score_test, score_train

precision, recall, f1, support = precision_recall_fscore_support(y_test, pred)

print("\nTestAcc", "TrainAcc", "Prec", "Rec", "F1","Sup\n", score_test[1], score_train[1], precision[1], recall[1], f1[1], support[1], sep="  ")

#### Training LSTM with CNN (CNN First)
CNN layers give output to LSTM. In other words, LSTM's input is CNNs.

In [21]:
# Different combinations tested.
input_layer = Input(shape=(X_train_padded_seq.shape[1],))

e = Embedding(vocab_size,
              100,
              weights=[embedding_matrix],
              input_length=X_train_padded_seq.shape[1],
              trainable=False)(input_layer)
cnn1 = Conv1D(128, 5, activation='tanh')(e)
pool1 = MaxPooling1D(5)(cnn1)
# cnn2 = Conv1D(128, 5, activation='relu')(pool1)
# pool2 = GlobalMaxPooling1D()(cnn2)
# hidden0 = Dense(2, activation='relu')(pool2)


hidden1 = LSTM(32, return_sequences=False)(pool1)


hidden2 = Dense(10, activation='relu')(hidden1)
output_layer = Dense(1, activation='sigmoid')(hidden2)
model = Model(inputs=input_layer, outputs=output_layer)


model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')
print("Model Compiled")

Model Compiled


In [22]:
hist = model.fit(X_train_padded_seq, y_train, epochs=20, batch_size=32, verbose=1)
print("Model fitted.")
preds = model.predict(X_test_padded_seq) # It gives probabilities

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model fitted.


##### Results for CNN First LSTM

In [23]:
# Convert predictions to binary.
threshold, pred = 0.5, []
[pred.append(1 if preds[i]>threshold else 0) for i,j in enumerate(preds)]

score_test = hist.model.evaluate(X_test_padded_seq, y_test)
score_train = hist.model.evaluate(X_train_padded_seq, y_train)
score_test, score_train

precision, recall, f1, support = precision_recall_fscore_support(y_test, pred)

print("\nTestAcc", "TrainAcc", "Prec", "Rec", "F1","Sup\n", score_test[1], score_train[1], precision, recall, f1, support, sep="  ")


TestAcc  TrainAcc  Prec  Rec  F1  Sup
  0.5399543381717107  0.9062857146263122  [0.54032258 0.53947368]  [0.60496614 0.47344111]  [0.57082002 0.50430504]  [443 433]
