In [16]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [17]:
data = keras.datasets.imdb
#only take the 10000 most common words (more rare words could mess up the model)
(train_data, train_labels), (test_data, test_labels) = data.load_data(num_words=88000)

word_index = data.get_word_index()
word_index = {k:(v+3) for k, v in word_index.items()}

word_index['<PAD>'] = 0
word_index['<START>'] = 1
word_index['<UNK>'] = 2
word_index['<UNUSED>'] = 3

#make value the key (we need the int values to point at their associated words)
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

In [18]:
#add padding for reviews less than 250 words
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index['<PAD>'], padding='post', maxlen=250)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index['<PAD>'], padding='post', maxlen=250)

print(len(train_data), len(test_data))

25000 25000


In [19]:
def decode_review(text):
    return "".join([reverse_word_index.get(i, "?") for i in text])

print(decode_review(test_data[0]))

<START>pleasegivethisoneamissbrbrkristyswansonandtherestofthecastrenderedterribleperformancestheshowisflatflatflatbrbridon'tknowhowmichaelmadisoncouldhaveallowedthisoneonhisplatehealmostseemedtoknowthiswasn'tgoingtoworkoutandhisperformancewasquitelacklustresoallyoumadisonfansgivethisamiss<PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><

In [20]:
model = keras.Sequential()
model.add(keras.layers.Embedding(88000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

x_val = train_data[:10000]
x_train = train_data[10000:]
y_val = train_labels[:10000]
y_train = train_labels[10000:]

fit_model = model.fit(x_train, y_train, epochs=40, batch_size=512, verbose=1, validation_data=(x_val, y_val))
results = model.evaluate(test_data, test_labels)

print(results)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
[0.33496516942977905, 0.8720800280570984]


In [21]:
test_review = test_data[0]
predict = model.predict([test_review])
print("Review: ")
print(decode_review(test_review))
print("Prediction: " + str(predict[0]))
print("Actual: " + str(test_labels[0]))
print(results)

Review: 
<START>pleasegivethisoneamissbrbrkristyswansonandtherestofthecastrenderedterribleperformancestheshowisflatflatflatbrbridon'tknowhowmichaelmadisoncouldhaveallowedthisoneonhisplatehealmostseemedtoknowthiswasn'tgoingtoworkoutandhisperformancewasquitelacklustresoallyoumadisonfansgivethisamiss<PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><P

In [22]:
model.save("model.h5")

In [25]:
#testing your own reviews
model = keras.models.load_model("model.h5")

def review_encode(s):
    encoded = [1]
    for word in s:
        if word in word_index:
            encoded.append(word_index[word])
        else:
            encoded.append(2)
    return encoded

with open("review.txt") as f:
    for line in f.readlines():
        nline = line.replace(",", "").replace("(", "").replace(")", "").replace(":", "").strip()
        encode = review_encode(nline)
        encode = keras.preprocessing.sequence.pad_sequences([encode], value=word_index['<PAD>'], padding='post', maxlen=250)
        predict = model.predict(encode)
        print(line)
        print(encode)
        print(predict[0])

Story: When the royal lion cub, Simba is born to Queen Sarabi and King Mufasa, animals in the forest rejoice at having a new heir. But their joy is short-lived when Mufasa dies trying to save little Simbaâ€™s life. Feeling guilty for being the cause of his fatherâ€™s death Simba runs away from the forest. And now his evil uncle Scar takes over the throne.

[[ 963    2    6  830    2 2023    6 1964   13 3363 1331    2    6    2
  3363  963 1992    2 2023  963   13 1479    2    2    2 1206  830    2
   830 2023  963   13 1479    2 1468 1604 5135    2   13  590    2  590
  2023 1604 1479  830    2 2014   13 1964  963 1095    2 1992 2023  963
  3363    2    2 1206 1209    6  590    6    2 1095   13  963  590    2
   830 1479 5135   13 3363 1331    2  830 1604    2  590    6 1964  963
     2 2014   13  830  830 2014  963    2    2   13 1983  503    6    2
     2    2  590    2 2014   13 1209  963    2    2    2  963  963 2014
    13 3363 1331    2 1331 1206   13 2014  830 5135    2 1209 160

Shreya Ghosal, Arman Malik and Sunayna Sarkar do a fine job with the Hindi renditions of the soundtrack. But for those who have reveled in the original 'Circle of Life', 'Can you feel the Love tonight' and 'Hakuna Matata' it may not match up totally. The background score by Hans Zimmer is one of the high points of the film.

[[   2 1479  963 3363 1095   13  830   13 1604 3363  590    2 1604 1209
     2  830 2023  963    2  590 1604 1206 3363 1095  830 1479    6 1148
  2295    2    2    2 1206  830    2 1209 1604 1479    2  830 2023 1604
   590  963    2 1992 2023 1604    2 2023    6 1964  963    2 1479  963
  1964  963 2014  963 1095    2   13 3363    2  830 2023  963    2 1604
  1479   13 1331   13 3363    6 2014    2  758    2   13 1479 1148 2014
   963    2 1604 1209    2    2   13 1209  963  758    2  758    2    6
  3363    2 5135 1604 1206    2 1209  963  963 2014    2  830 2023  963
     2    2 1604 1964  963    2  830 1604 3363   13 1331 2023  830  758
     2    6 3363 1095    