In [7]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence


In [8]:
VOCAB_SIZE = 88584
MAXLEN = 250
BATCH_SIZE = 64

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=VOCAB_SIZE)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [11]:
def decode(integers, index_word):
    PAD = 0
    text = ""
    for num in integers:
        if num != PAD:
            text += index_word[num] + ' '
    return text[:-1]

In [16]:
def encode(text, word_index, max_len=MAXLEN):
    tokens = tf.keras.preprocessing.text.text_to_word_sequence(text)
    tokens = [word_index[word] if word in word_index else 0 for word in tokens]
    return sequence.pad_sequences([tokens], MAXLEN)[0]

In [13]:
word_index = imdb.get_word_index()
index_word = {value: key for (key, value) in word_index.items()}

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [21]:
train_data = sequence.pad_sequences(train_data, MAXLEN)
test_data = sequence.pad_sequences(test_data, MAXLEN)

In [19]:
# CREATING MODEL
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, 32),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [22]:
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['acc'])
history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
def predict(text):
    encoded_text = encode(text, word_index)
    pred = [encoded_text]
    return model.predict(pred)    

In [24]:
positive_review = "That movie was! really loved it and would great watch it again because it was amazingly great"
predict(positive_review)

array([[0.9893925]], dtype=float32)