In [None]:
from __future__ import print_function
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    
import keras
import KerasTools as KT
import numpy as np

In [None]:
max_features = 1000 # Top most frequent words to consider
maxlen = 200       # Cut texts after this number of words

print('Load data...')
(train_data, train_labels), (test_data, test_labels) = KT.datasets.imdb.load_data(num_words=max_features)

print('Pad sequences (samples x time)')
x_train = keras.preprocessing.sequence.pad_sequences(train_data, maxlen=maxlen)
x_test = keras.preprocessing.sequence.pad_sequences(test_data, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

In [None]:
print('Build model...')
model = keras.models.Sequential()
model.add(keras.layers.Embedding(max_features, 3, input_shape=(None, ), mask_zero=True))
model.add(keras.layers.SimpleRNN(4))
model.add(keras.layers.Dense(4, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.RMSprop(),
              metrics=['accuracy'])
model.summary()

In [None]:
print('Train...')
history = model.fit(x_train, train_labels,
          batch_size=256, epochs=30, validation_split=0.1)

In [None]:
KT.plot_history(history.history)

In [None]:
print('Build and train final model...')
model = keras.models.Sequential()
model.add(keras.layers.Embedding(max_features, 3, input_shape=(None, ), mask_zero=True))
model.add(keras.layers.SimpleRNN(4))
model.add(keras.layers.Dense(4, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.RMSprop(),
              metrics=['accuracy'])
model.fit(x_train, train_labels, batch_size=256, epochs=10)
test_loss, test_acc = model.evaluate(x_test, test_labels)

In [None]:
history.history['test_loss'] = test_loss
history.history['test_acc'] = test_acc
history.history['epochs'] = 10
KT.plot_history(history.history)

In [None]:
model.save("IMDB.h5")

In [None]:
review = """
As far as the show goes it's fantastic 
but what really stood out in my opinion was the performance of Joaquín Cosío as 'Don Neto' 
I believe this is one of the best supporting performances in any Netflix series to date! 
The arc that he goes through and the performance that he turns in cannot be ignored. 
He is a very talented actor and displays this in every scene he's in! 
He is the most believeable character in the show shining brightly over an already brilliant cast of actors.
"""

In [None]:
word_index = KT.datasets.imdb.get_word_index()
text_split = keras.preprocessing.text.text_to_word_sequence(review)
print(text_split)
text_token = [word_index.get(t, max_features+1) for t in text_split]
text_dict = [1] + [2 if d > max_features else d+3 for d in text_token]
text_predict= np.asarray(text_dict).reshape(1, len(text_dict))
print(text_predict)
reverse_word_index = {v+3:k for k,v in word_index.items()}
reverse_word_index[0] = '-PAD-'
reverse_word_index[1] = '-START-'
reverse_word_index[2] = '-UNK-'
print(" ".join([reverse_word_index[t] for t in text_dict]))

In [None]:
result = model.predict(text_predict)
print('-' * 24)
print(" Good movie | {0:>6.2f}%".format(100*result[0][0]))
print(" Bad movie  | {0:>6.2f}%".format(100.0 - 100*result[0][0]))
print('-' * 24)