# LSTM for Sentiment Analysis using Keras

Import imdb datasets from Keras library

In [1]:
from keras.datasets import imdb

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Set the vocabulary  size and load training and testing data (data are from keras datasets)

In [2]:
from keras.preprocessing.text import one_hot
from numpy import array
vocab_size = 5000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = vocab_size)
print('Loaded dataset with {} training samples, {} test samples'.format(len(x_train), len(x_test)))

Loaded dataset with 25000 training samples, 25000 test samples


# Padding the Document

Ensure all input document are in the same length / max length is 500 and padding shorter value with 0

In [3]:
from keras.preprocessing import sequence
max_words = 500
x_train = sequence.pad_sequences(x_train,maxlen = max_words)
x_test = sequence.pad_sequences(x_test,maxlen = max_words)

# Design the RNN

In [4]:
from keras import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout

embedding_size=32
model = Sequential()
model.add(Embedding(vocab_size, embedding_size,input_length=max_words))
model.add(LSTM(100))
model.add(Dense(1, activation = 'sigmoid'))

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None


# Compiling The Model

Our model need to compiled by specifying the loss function(to calculate error) and optimizer

In [5]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics =['accuracy'])

# Training Model

In [6]:
batch = 64
epoch = 3

x_valid, y_valid = x_train[:batch], y_train[:batch]
x_train2,y_train2 = x_train[batch:], y_train[batch:]

model.fit(x_train2,y_train2, validation_data = (x_valid, y_valid), batch_size = batch, epochs = epoch)

Train on 24936 samples, validate on 64 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x8b80779400>

# Testing the Model

In [7]:
scores = model.evaluate(x_test,y_test, verbose = 0 )
print("Test Accuracy : {}".format(scores[1]))

Test Accuracy : 0.86712
