In [95]:
import numpy as np
from keras.datasets import imdb
from matplotlib import pyplot as plot
import keras.preprocessing.text
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Flatten, LSTM, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
import keras.preprocessing.text
from keras import backend as K

In [29]:
top_words = 5000
max_review_length = 500
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [30]:
# Finding number of classes
np.unique(y_test)

array([0, 1])

In [31]:
X_train.shape

(25000,)

In [32]:
y_train.shape

(25000,)

In [68]:
# truncate and pad input sequences
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

# LSTM Model

In [35]:
embedding_vector_length = 32
model = Sequential()
# Uses 32 vectors to represent each word
model.add(Embedding(top_words, embedding_vector_length, input_length=max_review_length))
# LSTM with 100 memory unit
model.add(LSTM(100))
# It's a binary classification issue. Use single neuron to output either 0 or 1
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None
Train on 25000 samples, validate on 25000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x181f50f3d0>

# Model evaluation

In [40]:
print(X_test)

[[   0    0    0 ...,   14    6  717]
 [   0    0    0 ...,  125    4 3077]
 [  33    6   58 ...,    9   57  975]
 ..., 
 [   0    0    0 ...,   21  846    2]
 [   0    0    0 ..., 2302    7  470]
 [   0    0    0 ...,   34 2005 2643]]


In [41]:
scores = model.evaluate(X_test, y_test, verbose=1)
print('Accuracy: {}'.format(scores[1] * 100))

Accuracy: 87.588


In [66]:
text = '''
I've enjoyed previous Thor movies and after seeing the rating here i expected this to be a decent movie, it wasn't.

I guess this is the trend to make money on movies now days, just have big stars, bad jokes and lot of pointless action and effects. It's just so sad if you think about the potential of how good these movies could be.

Maybe this was the last Marvel movie I bother to watch.
'''
x = keras.preprocessing.text.one_hot(text, top_words, lower=True, split=' ')
x = [x]
x = sequence.pad_sequences(x, max_review_length)
predictions = model.predict_classes(x)

In [67]:
sentiment = predictions[0][0]
print(predictions)
if sentiment == 1:
    print('Someone likes the movie: ', text)
else:
    print('Someone DOESNT like the')

[[1]]
('Someone likes the movie: ', "\nI've enjoyed previous Thor movies and after seeing the rating here i expected this to be a decent movie, it wasn't.\n\nI guess this is the trend to make money on movies now days, just have big stars, bad jokes and lot of pointless action and effects. It's just so sad if you think about the potential of how good these movies could be.\n\nMaybe this was the last Marvel movie I bother to watch.\n")


# Prevent overfitting

In [None]:
# Construct a new model with dropouts
model = Sequential()
model.add(Embedding(top_words, embedding_vector_length, input_length=max_review_length))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
dropout_1 (Dropout)          (None, 500, 32)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64)

Train on 25000 samples, validate on 25000 samples
Epoch 1/3
  256/25000 [..............................] - ETA: 6:13 - loss: 0.6935 - acc: 0.4688

# Evaluate the new LSTM model with dropouts

In [None]:
scores = model.evaluate(X_test, y_test, verbose=1)
print('Accuracy: {}'.format(scores[1] * 100))

In [None]:
x = '''
I've enjoyed previous Thor movies and after seeing the rating here i expected this to be a decent movie, it wasn't.

I guess this is the trend to make money on movies now days, just have big stars, bad jokes and lot of pointless action and effects. It's just so sad if you think about the potential of how good these movies could be.

Maybe this was the last Marvel movie I bother to watch.
'''
x = 'I love this movie!'
text = keras.preprocessing.text.one_hot(x, top_words, lower=True, split=' ')
text = [text]
text = sequence.pad_sequences(text, max_review_length)
predictions = model.predict_classes(text)

In [94]:
sentiment = predictions[0][0]
print(predictions)
if sentiment == 1:
    print('Someone likes the movie: ', x)
else:
    print('Someone DOESNT like the movie ', x)

[[0]]
('Someone DOESNT like the movie ', "\nI've enjoyed previous Thor movies and after seeing the rating here i expected this to be a decent movie, it wasn't.\n\nI guess this is the trend to make money on movies now days, just have big stars, bad jokes and lot of pointless action and effects. It's just so sad if you think about the potential of how good these movies could be.\n\nMaybe this was the last Marvel movie I bother to watch.\n")
