In [1]:
from __future__ import print_function 
import numpy as np
import tensorflow as tf 

import os
import sys

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Lambda
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.layers import SimpleRNN, GRU, LSTM, Bidirectional

import keras.backend as K

Using TensorFlow backend.


## Load the dataset

In [3]:
f = np.load('data_and_embedding.npz')

In [4]:
num_words = int(f['num_words'])
embedding_dim = int(f['embedding_dim'])
max_sequence_length = int(f['max_sequence_length'])

data = f['data']
labels = f['labels']

embedding_matrix = f['embedding_matrix']

In [48]:
validation_split = 0.2 
epochs = 60

In [6]:
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
num_validation_samples = int(validation_split * data.shape[0])

x_train = data[:-num_validation_samples]
y_train = labels[:-num_validation_samples]
x_val = data[-num_validation_samples:]
y_val = labels[-num_validation_samples:]

## Simple RNN

### Build the model 

In [15]:
embedding_layer = Embedding(num_words,
                            embedding_dim,
                            weights=[embedding_matrix],
                            input_length=max_sequence_length,
                            trainable=False)

In [16]:
def embedding_mean(x):
    return tf.reduce_mean(x, axis=1)

In [17]:
sequence_input = Input(shape=(max_sequence_length, ), dtype='int32')
# print(sequence_input.shape)
embedded_sequences = embedding_layer(sequence_input)
# print(embedded_sequences.shape)
x = SimpleRNN(50, dropout=0.2, recurrent_dropout=0.2)(embedded_sequences)
print(x.shape)
# x = Lambda(embedding_mean)(embedded_sequences)
# print(x.shape)
preds = Dense(6, activation='softmax')(x)
# print(preds.shape)

model_rnn_final_state = Model(sequence_input, preds)

(?, 50)


In [18]:
model_rnn_final_state.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, array(1000))       0         
_________________________________________________________________
embedding_2 (Embedding)      (None, array(1000), array 2000000   
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 50)                7550      
_________________________________________________________________
dense_2 (Dense)              (None, 6)                 306       
Total params: 2,007,856.0
Trainable params: 7,856.0
Non-trainable params: 2,000,000.0
_________________________________________________________________


### Train the model

In [19]:
model_rnn_final_state.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

In [20]:
import time 
start_time = time.time()

model_rnn_final_state.fit(x_train, y_train,
          batch_size=128,
          epochs=epochs,
          validation_data=(x_val, y_val))

print("Training time: ", time.time() - start_time)

Train on 311902 samples, validate on 77975 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training time:  7807.194259881973


probably a vanishing/exploding gradient problem !!!

### Save the model

In [21]:
model_rnn_final_state.save('models/RNN.h5')

## LSTM

### Build the model 

In [22]:
embedding_layer = Embedding(num_words,
                            embedding_dim,
                            weights=[embedding_matrix],
                            input_length=max_sequence_length,
                            trainable=False)

In [23]:
def embedding_mean(x):
    return tf.reduce_mean(x, axis=1)

In [24]:
sequence_input = Input(shape=(max_sequence_length, ), dtype='int32')
print(sequence_input.shape)
embedded_sequences = embedding_layer(sequence_input)
print(embedded_sequences.shape)
x = LSTM(50, dropout=0.2, recurrent_dropout=0.2)(embedded_sequences)
print(x.shape)
# x = Lambda(embedding_mean)(x)
# print(x.shape)
preds = Dense(6, activation='softmax')(x)
print(preds.shape)

model_lstm_final_state = Model(sequence_input, preds)

(?, 1000)
(?, 1000, 100)
(?, 50)
(?, 6)


In [25]:
model_lstm_final_state.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, array(1000))       0         
_________________________________________________________________
embedding_3 (Embedding)      (None, array(1000), array 2000000   
_________________________________________________________________
lstm_2 (LSTM)                (None, 50)                30200     
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 306       
Total params: 2,030,506.0
Trainable params: 30,506.0
Non-trainable params: 2,000,000.0
_________________________________________________________________


### Train the model

In [26]:
model_lstm_final_state.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

In [None]:
import time 
start_time = time.time()

model_lstm_final_state.fit(x_train, y_train,
          batch_size=128,
          epochs=epochs,
          validation_data=(x_val, y_val))

print("Training time: ", time.time() - start_time)

Train on 311902 samples, validate on 77975 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60

### Save the model

In [28]:
model_lstm_final_state.save('models/LSTM.h5')

## GRU

### Build the model 

In [29]:
embedding_layer = Embedding(num_words,
                            embedding_dim,
                            weights=[embedding_matrix],
                            input_length=max_sequence_length,
                            trainable=False)

In [30]:
def embedding_mean(x):
    return tf.reduce_mean(x, axis=1)

In [31]:
sequence_input = Input(shape=(max_sequence_length, ), dtype='int32')
# print(sequence_input.shape)
embedded_sequences = embedding_layer(sequence_input)
print(embedded_sequences.shape)
x = GRU(50, dropout=0.2, recurrent_dropout=0.2)(embedded_sequences)
print(x.shape)
# x = Lambda(embedding_mean)(embedded_sequences)
# print(x.shape)
preds = Dense(6, activation='softmax')(x)
# print(preds.shape)

model_gru_final_state = Model(sequence_input, preds)

(?, 1000, 100)
(?, 50)


In [32]:
model_gru_final_state.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, array(1000))       0         
_________________________________________________________________
embedding_4 (Embedding)      (None, array(1000), array 2000000   
_________________________________________________________________
gru_1 (GRU)                  (None, 50)                22650     
_________________________________________________________________
dense_4 (Dense)              (None, 6)                 306       
Total params: 2,022,956.0
Trainable params: 22,956.0
Non-trainable params: 2,000,000.0
_________________________________________________________________


### Train the model

In [33]:
model_gru_final_state.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

In [34]:
import time 
start_time = time.time()

model_gru_final_state.fit(x_train, y_train,
          batch_size=128,
          epochs=epochs,
          validation_data=(x_val, y_val))

print("Training time: ", time.time() - start_time)

Train on 311902 samples, validate on 77975 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training time:  21727.566279172897


### Save the model

In [35]:
model_gru_final_state.save('models/GRU.h5')

## Bidirectional LSTM

### Build the model 

In [36]:
embedding_layer = Embedding(num_words,
                            embedding_dim,
                            weights=[embedding_matrix],
                            input_length=max_sequence_length,
                            trainable=False)

In [37]:
def embedding_mean(x):
    return tf.reduce_mean(x, axis=1)

In [43]:
sequence_input = Input(shape=(max_sequence_length, ), dtype='int32')
# print(sequence_input.shape)
embedded_sequences = embedding_layer(sequence_input)
# print(embedded_sequences.shape)
# x = Bidirectional(LSTM(50, dropout=0.2, recurrent_dropout=0.2))(embedded_sequences)
x = Bidirectional(LSTM(50))(embedded_sequences)
print(x.shape)
# x = Lambda(embedding_mean)(embedded_sequences)
# print(x.shape)
preds = Dense(6, activation='softmax')(x)
# print(preds.shape)

model_bidirlstm_final_state = Model(sequence_input, preds)

(?, 100)


In [44]:
model_bidirlstm_final_state.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, array(1000))       0         
_________________________________________________________________
embedding_5 (Embedding)      (None, array(1000), array 2000000   
_________________________________________________________________
bidirectional_2 (Bidirection (None, 100)               60400     
_________________________________________________________________
dense_6 (Dense)              (None, 6)                 606       
Total params: 2,061,006.0
Trainable params: 61,006.0
Non-trainable params: 2,000,000.0
_________________________________________________________________


### Train the model

In [45]:
model_bidirlstm_final_state.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

In [46]:
import time 
start_time = time.time()

model_bidirlstm_final_state.fit(x_train, y_train,
          batch_size=128,
          epochs=epochs,
          validation_data=(x_val, y_val))

print("Training time: ", time.time() - start_time)

Train on 311902 samples, validate on 77975 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training time:  54313.65548968315


### Save the model

In [47]:
model_bidirlstm_final_state.save('models/BidirectionalLSTM.h5')