In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

data = keras.datasets.imdb

(train_data, train_labels), (test_data, test_labels) = data.load_data(num_words=88000)

word_index = data.get_word_index()

word_index = {k:(v+3) for k, v in word_index.items()}

word_index['<PAD>'] = 0 
word_index['<START>'] = 1
word_index['<UNK>'] = 2
word_index['<UNUSED>'] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index['<PAD>'], padding='post', maxlen=250)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index['<PAD>'], padding='post', maxlen=250)

def decode_review(text):
    return " ".join([reverse_word_index.get(i, "?") for i in text])

"""
model = keras.Sequential()
model.add(keras.layers.Embedding(88000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation="relu"))
model.add(keras.layers.Dense(1, activation="sigmoid"))

model.summary()

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

x_val = train_data[:10000]
x_train = train_data[10000:]

y_val = train_labels[:10000]
y_train = train_labels[10000:]

fitModel = model.fit(x_train, y_train, epochs=30, batch_size=512, validation_data=(x_val, y_val), verbose=1)

results = model.evaluate(test_data, test_labels)

print(results)

# it is called h5, you could call it anything but h5 is the convention
# this saves our model in binary
model.save('model.h5')
"""


# this takes the review and if the word is a word that we have the numerical value for then we add the numerical value to
# a string and if it isn't in the dictionary then we append '2' instead 
def review_encode(s):
    # the tag start has the value of one so it makes sense that we name our start '1' 
    encoded = [1] 
    for word in s:
        if word.lower() in word_index:
            encoded.append(word_index[word.lower()])
        else:
            encoded.append(2)
    return encoded
            

model = keras.models.load_model('model.h5')

with open('6_descendants_review.txt', encoding='utf-8') as f:
    # reads line by line so that it is easy to add in another review if you want to
    total = 0
    i = 0
    for line in f.readlines():
        # this replaces all of the punctuation because they are not numbers in our list
        nline = line.replace(',', '').replace('.', '').replace('(', '').replace(')', '').replace(':', '').replace("\"", '').replace('-', '').strip().split(' ')
        encode = review_encode(nline)
        # adds the tags
        encode = keras.preprocessing.sequence.pad_sequences([encode], value=word_index['<PAD>'], padding='post', maxlen=250)
        predict = model.predict(encode)
        print(line)
        print(encode)
        print(predict[0])
        total += predict[0]
        i += 1
    number = str(round(total[0]/i*10, 1))
    print('Expected Score: ' + number + '/10')
    print('Actual Score: 6.0/10.0')
        