# Building LSTM model for text data and getting the results

### Start by importing the SimpleRNN layer

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN
import tensorflow as tf
print('tf version', tf.__version__)

model = Sequential()

# Word embeddings are dense representation of words and their relative meanings. 
# They can be learned from text data and reused among projects. 
# They can also be learned as part of fitting a neural network on text data.

model.add(Embedding(10000, 32))
model.add(SimpleRNN(32))

tf version 2.0.0-beta0


### Let's see how the model looks

It has over 322,000 parameters

In [2]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 32)          320000    
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 32)                2080      
Total params: 322,080
Trainable params: 322,080
Non-trainable params: 0
_________________________________________________________________


In [None]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32, return_sequences=True))

model.summary()

It is sometimes useful to stack several recurrent layers one after the other in order to increase the representational power of a network. 

In [3]:
model = Sequential()
model.add(Embedding(10000, 32))

model.add(SimpleRNN(32, return_sequences=True))
model.add(SimpleRNN(32, return_sequences=True))
model.add(SimpleRNN(32, return_sequences=True))
# return_sequences: Boolean. Whether to return the last output
# in the output sequence, or the full sequence.
model.add(SimpleRNN(32))

In [4]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 32)          320000    
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, None, 32)          2080      
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, None, 32)          2080      
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (None, None, 32)          2080      
_________________________________________________________________
simple_rnn_4 (SimpleRNN)     (None, 32)                2080      
Total params: 328,320
Trainable params: 328,320
Non-trainable params: 0
_________________________________________________________________


Now let's try to use such a model on the IMDB movie review classification problem. First, let's preprocess the data:

In [5]:
from keras.datasets import imdb
from keras.preprocessing import sequence

max_features = 10000  # number of words to consider as features
maxlen = 500  # cut texts after 500 words
batch_size = 32

print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')

print('Pad sequences (samples x time)')
input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
input_test = sequence.pad_sequences(input_test, maxlen=maxlen)
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)

Using TensorFlow backend.


Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
input_train shape: (25000, 500)
input_test shape: (25000, 500)


Let's train a simple recurrent network using an `Embedding` layer and a `SimpleRNN` layer:

In [6]:
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Embedding(max_features, 32))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

history = model.fit(input_train, y_train,
                    epochs=1,
                    batch_size=128,
                    validation_split=0.2)

Train on 20000 samples, validate on 5000 samples


Let's display the training and validation loss and accuracy:

In [7]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']


print('Training set accuracy is: ', acc)
print('Validation set accuracy is: ', val_acc)
print('Training set Loss is: ', loss)
print('Validation set accuracy is: ', val_loss)

# Of course, you can train it for larger epochs
# to improve the accuracy

Training set accuracy is:  [0.55735]
Validation set accuracy is:  [0.6458]
Training set Loss is:  [0.6792083116531372]
Validation set accuracy is:  [0.6350920477867127]


## 2) Same Example with LSTM - Long Short-term Memory Layer

In [8]:
from tensorflow.keras.layers import LSTM

model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])
history = model.fit(input_train, y_train,
                    epochs=1,
                    batch_size=128,
                    validation_split=0.2)

Train on 20000 samples, validate on 5000 samples


In [9]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']


print('Training set accuracy is: ', acc)
print('Validation set accuracy is: ', val_acc)
print('Training set Loss is: ', loss)
print('Validation set accuracy is: ', val_loss)

# Of course, you can train it for larger epochs
# to improve the accuracy

Training set accuracy is:  [0.76255]
Validation set accuracy is:  [0.8336]
Training set Loss is:  [0.5059212841033935]
Validation set accuracy is:  [0.3961514075756073]
