For this Experiment We Will Used the IMDB movie review dataset which is given in the keras

In [1]:
from keras.datasets import imdb

In [2]:
vocabulary_size = 1000

(Xtrain, ytrain), (Xtest, ytest) = imdb.load_data(num_words = vocabulary_size)
print('Loaded dataset with {} training samples, {} test samples'.format(len(Xtrain), len(Xtest)))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Loaded dataset with 25000 training samples, 25000 test samples


In [3]:
word2id = imdb.get_word_index()
id2word = {i: word for word, i in word2id.items()}
print('****** review with words ******')
print([id2word.get(i, ' ') for i in Xtrain[6]])
print('****** label ******')
print(ytrain[6])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
****** review with words ******
['the', 'and', 'full', 'and', 'to', 'and', 'boring', 'this', 'as', 'and', 'and', 'br', 'and', 'and', 'and', 'need', 'has', 'of', 'and', 'b', 'message', 'to', 'may', 'of', 'and', 'this', 'and', 'and', 'and', 'and', 'and', 'to', 'and', 'he', 'is', 'and', 'and', 'movie', 'women', 'like', "isn't", 'and', "i'm", 'and', 'to', 'and', 'in', 'and', 'for', 'from', 'did', 'having', 'because', 'very', 'quality', 'it', 'is', 'and', 'and', 'really', 'book', 'is', 'both', 'too', 'worked', 'and', 'of', 'and', 'br', 'of', 'and', 'and', 'figure', 'really', 'there', 'will', 'and', 'things', 'is', 'far', 'this', 'make', 'and', 'and', 'was', "couldn't", 'of', 'few', 'br', 'of', 'you', 'to', "don't", 'female', 'than', 'place', 'she', 'to', 'was', 'between', 'that', 'nothing', 'and', 'movies', 'get', 'are', 'and', 'br', 'yes', 'female', 'just', 'its', 'because', 'many', 'br',

In [4]:
print('Maximum review length: {}'.format(
len(max((Xtrain + Xtest), key=len))))

Maximum review length: 2697


In [5]:
print('Minimum review length: {}'.format(
len(min((Xtest + Xtest), key=len))))

Minimum review length: 14


In [6]:
from tensorflow.keras.preprocessing import sequence

max_words = 2697
Xtrain = sequence.pad_sequences(Xtrain, maxlen=max_words)
Xtest = sequence.pad_sequences(Xtest, maxlen=max_words)

In [7]:
from keras import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout

embedding_size=32
model=Sequential()

model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 2697, 32)          32000     
                                                                 
 lstm (LSTM)                 (None, 2697, 128)         82432     
                                                                 
 dropout (Dropout)           (None, 2697, 128)         0         
                                                                 
 lstm_1 (LSTM)               (None, 2697, 64)          49408     
                                                                 
 dropout_1 (Dropout)         (None, 2697, 64)          0         
                                                                 
 lstm_2 (LSTM)               (None, 32)                12416     
                                                                 
 dense (Dense)               (None, 1)                 3

In [8]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
batch_size = 64
num_epochs = 10

In [10]:
from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)

In [11]:
Xval, yval = Xtrain[:batch_size], ytrain[:batch_size]
X_train2, y_train2 = Xtrain[batch_size:], ytrain[batch_size:]

model.fit(X_train2, y_train2, validation_data=(Xval, yval), batch_size=batch_size, epochs=num_epochs)

Epoch 1/10


In [13]:
scores = model.evaluate(Xtest, ytest, verbose=0)
print('Test accuracy:', scores[1])

Test accuracy: 0.861519992351532


In [24]:
y_pred = model.predict(Xtest)



In [25]:
y_pred

array([[0.30001882],
       [0.97547656],
       [0.6257033 ],
       ...,
       [0.14876647],
       [0.05197641],
       [0.7804181 ]], dtype=float32)