In [124]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [125]:
data = keras.datasets.imdb

In [126]:
(train_data, train_labels), (test_data, test_labels) = data.load_data(num_words=88000)

In [127]:
word_index = data.get_word_index()

word_index = {k : (v + 3) for k, v in word_index.items()} 
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3

In [128]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

In [129]:
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], padding='post', maxlen=250)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding='post', maxlen=250)

In [130]:
def decode_review(text):
    return " ".join([reverse_word_index.get(i, '?') for i in text])

In [131]:
model = keras.Sequential()
model.add(keras.layers.Embedding(88000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation="relu"))
model.add(keras.layers.Dense(1, activation='sigmoid'))

In [132]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, None, 16)          1408000   
_________________________________________________________________
global_average_pooling1d_4 ( (None, 16)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 17        
Total params: 1,408,289
Trainable params: 1,408,289
Non-trainable params: 0
_________________________________________________________________


In [133]:
model.compile(optimizer='adam', loss="binary_crossentropy", metrics=["accuracy"])

In [134]:
x_val = train_data[:10000]
x_train = train_data[10000:]

In [135]:
y_val = train_labels[:10000]
y_train = train_labels[10000:]

In [136]:
fitModel = model.fit(x_train, y_train, epochs=40, batch_size=512, validation_data=(x_val, y_val), verbose=1)

Train on 15000 samples, validate on 10000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [137]:
results = model.evaluate(test_data, test_labels)
results



[0.33818242547512056, 0.87224]

In [138]:
model.save("model.h5")

In [139]:
model = keras.models.load_model("model.h5")

In [145]:
def review_encode(s):
    encoded = [1]
    
    for word in s:
        if word.lower() in word_index:
            encoded.append(word_index[word.lower()])
        else:
            encoded.append(2)
    return encoded

In [156]:
with open("test.txt", encoding="utf-8") as f:
    for line in f.readlines():
        nline = line.replace(",", "").replace(".", "").replace("(", "").replace(")", "").replace(":", "").replace("\"", "").strip().split(" ")
        encode = review_encode(nline)
        encode = keras.preprocessing.sequence.pad_sequences([encode], value=word_index["<PAD>"], padding="post", maxlen=250)
        predict = model.predict(encode)
        print(line)
        print(encode)
        print(predict[0])

I liked the movie and I didn't walk into the cinema assuming it will be trash. I didn't go there to compare with the 1994 movie. I just enjoyed it. Now I can see a lot of 1-star reviews and I am shocked, because 90% of them have no intelligent arguments as to why this movie gets such horrible ratings. So, I won't make an exception and I won't give any explanation for this 10-star review. I loved every minute and also, I almost cried at some scenes.
[[   1   13  423    4   20    5   13  161 1135   83    4  438 5683   12
    80   30 1157   13  161  140   50    8 1661   19    4 6114   20   13
    43  510   12  150   13   70   67    6  176    7    2  857    5   13
   244 2414   88    2    7   98   28   57 1089 6702   17    8  138   14
    20  214  141  527 2896   38   13  528   97   35 1401    5   13  528
   202  101 1823   18   14    2  733   13  447  175  786    5   82   13
   220 3785   33   49  139    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0  