In [None]:
from keras.datasets import imdb
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import numpy as np

VOCAB_SIZE = 88584

MAXLEN = 250
BATCH_SIZE = 64

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = VOCAB_SIZE)

In [None]:
# look at one review
print(train_data[1])
# len different of each datapoint
print(len(train_data[1]))

## preprocessing

In [None]:
# left-sided padding to MAXLEN
train_data = sequence.pad_sequences(train_data, MAXLEN)
test_data = sequence.pad_sequences(test_data, MAXLEN)

## creating the model

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, 32),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.summary()

## training

In [None]:
model.compile(loss="binary_crossentropy",optimizer="rmsprop",metrics=["acc"])

history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)

In [None]:
results = model.evaluate(test_data, test_labels)
print(results)

[0.48960790038108826, 0.8442800045013428]


## prediction

* ### encode

In [None]:
word_index = imdb.get_word_index()

def encode_text(text):
  tokens = keras.preprocessing.text.text_to_word_sequence(text)
  tokens = [word_index[word] if word in word_index else 0 for word in tokens]
  return sequence.pad_sequences([tokens], MAXLEN)[0]

text = "that movie was just amazing, so amazing"
encoded = encode_text(text)
print(encoded)

In [None]:
word_index = imdb.get_word_index()
print(word_index)

* ### decode

In [None]:
reverse_word_index = {value: key for (key, value) in word_index.items()}

def decode_integers(integers):
    PAD = 0
    text = ""
    for num in integers:
      if num != PAD:
        text += reverse_word_index[num] + " "

    return text[:-1]
  
print(decode_integers(encoded))

* ### predict

In [None]:
def predict(text):
  encoded_text = encode_text(text)
  pred = np.zeros((1,250))
  pred[0] = encoded_text
  result = model.predict(pred) 
  print(result[0])

positive_review = "That movie was! really loved it and would great watch it again because it was amazingly great"
predict(positive_review)

negative_review = "that movie really sucked. I hated it and wouldn't watch it again. Was one of the worst things I've ever watched"
predict(negative_review)

## save and load model

In [None]:
model.save("sentiment_analysis.h5")
new_model = tf.keras.models.load_model("sentiment_analysis.h5")