In [None]:
# ---------- Recurrent Neural Network ----------

In [None]:
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras import utils
import tensorflow as tf
import os
import numpy as np

VOCAB_SIZE = 88584

MAXLEN = 250
BATCH_SIZE = 64

((train_data, train_labels), (test_data, test_labels)) = imdb.load_data(num_words=VOCAB_SIZE)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
# Let's look at one review

len(train_data[1])

189

In [None]:
# Length of each review is not same, so we trim reviews > 250 and add reviews < 250 to equate them to 250

train_data = utils.pad_sequences(train_data, MAXLEN)
test_data = utils.pad_sequences(test_data, MAXLEN)

In [None]:
# Create the Model

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, 32),        # finds a way more meaningful representation of the numbers(words)
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          2834688   
                                                                 
 lstm (LSTM)                 (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 2,843,041
Trainable params: 2,843,041
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Training

model.compile(loss="binary_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Prediction

results = model.evaluate(test_data, test_labels)
print(results)

[0.5878958106040955, 0.8428000211715698]


In [None]:
# Making predictions
# Since are reviews are encoded well need to convert any review that we write into that form so that the network can understand it. To do that well
# load the encodings from the dataset and use them to encode our own data
from tensorflow import keras
word_index = imdb.get_word_index()

def encoded_text(text):
  tokens = keras.preprocessing.text.text_to_word_sequence(text)
  tokens = [word_index[word] if word in word_index else 0 for word in tokens]
  return utils.pad_sequences([tokens], MAXLEN)[0]

text = "that movie was just amazing, so amazing"
encoded = encode_text(text)
print(encoded)

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0  12  17  13  4

In [None]:
# Let's make a decode function

reverse_word_index = {value: key for (key, value) in word_index.items()}

def decode_integers(integers):
  PAD = 0
  text = ""
  for num in integers:
    if num != PAD:
      text += reverse_word_index[num] + " "
  return text[:-1]

print(decode_integers(encoded)) 

that movie was just amazing so amazing


In [None]:
# Now let's make predictions

def predict(text):
  encoded_txt = encoded_text(text)
  pred = np.zeros((1, 250))
  pred[0] = encoded_txt
  result = model.predict(pred)
  print(result[0])

positive_review = "That movie was so awesome! I really loved it and would watch it again because it was amazingly great"
negative_review = "That movie really sucked. I hated it and wouldn't watch it again. Was one of the worst things I have ever watched"

predict(positive_review)
predict(negative_review)

[0.57587314]
[0.38291138]
