# Neccessary Imports 

In [29]:
import numpy as np
import os

os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow logging

from keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

# Loading Data Set and Preprocessing 

In [5]:
VOCAB_SiZE = 88584

MAXLEN = 250
BATCH_SIZE = 64

In [6]:
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words= VOCAB_SiZE) #Load the data

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1us/step


In [27]:
print(f'training data is {train_data[1]} and corresponding label is {train_labels[1]}')

training data is [    0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     1   194  1153   194  8255    78   228     5     6  1463  4369
  5012   134    26     4   715     8   118  1634    14   394    20    13
   119   954   189   102     5   207   110  3103    21    14    69   188
     8    30    23     7     4   249   126    93     4   114     9  2300
  1523     5   647     4   116     9    35  8163     4   229     9   340
  1322     4   118     9     4   130  4901    19     4  1002     5    89
    29   952    46    37     4   455     9    45    43    38  1543  1905
   398     4  1649    26  6853     5   163    11  3215 10156     4  1153
     9   194   775     7  8255 115

In [24]:
train_data = pad_sequences(train_data, MAXLEN)
test_data = pad_sequences(test_data, MAXLEN)

In [25]:
print(f'training data is {len(train_data[1])} and corresponding label is {train_labels[1]}')

training data is 250 and corresponding label is 0


# Create the Model & Compile it & Train it

In [34]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SiZE, 32),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [35]:
model.summary()

In [36]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 56ms/step - accuracy: 0.6886 - loss: 0.5583 - val_accuracy: 0.8494 - val_loss: 0.3624
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 55ms/step - accuracy: 0.8844 - loss: 0.2923 - val_accuracy: 0.8806 - val_loss: 0.2934
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 55ms/step - accuracy: 0.9221 - loss: 0.2171 - val_accuracy: 0.8740 - val_loss: 0.3182
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 54ms/step - accuracy: 0.9385 - loss: 0.1761 - val_accuracy: 0.8776 - val_loss: 0.2932
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 54ms/step - accuracy: 0.9412 - loss: 0.1601 - val_accuracy: 0.8842 - val_loss: 0.3081
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 55ms/step - accuracy: 0.9603 - loss: 0.1191 - val_accuracy: 0.8782 - val_loss: 0.3090
Epoch 7/10
[1m6

# Model Evaluation

In [42]:
result = model.evaluate(test_data, test_labels)
print(result)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.8731 - loss: 0.4189
[0.4231087863445282, 0.871999979019165]


# Model Testing 

In [47]:
word_index = imdb.get_word_index()

def encode_text(text):
    tokens = tf.keras.preprocessing.text.text_to_word_sequence(text)
    tokens = [word_index[word] if word in word_index else 0 for word in tokens]
    return pad_sequences([tokens], MAXLEN)[0]

text = "that was just amazing, so amazing"
encoded = encode_text(text)
print(encoded)

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0  12  13  4

In [66]:
def predict(text):
        encoded_text = encode_text(text)
        pred = np.zeros((1,250))
        pred[0] = encoded_text
        result = model.predict(pred)
        print(result[0])
        if result[0] > 0.5:
            print("The Model prediction is postive")
        else:
            print("The Model prediction is negative")

postive_review = "That movie was so awesome! I really loved it and would watch it again because it was amazingly great"
predict(postive_review)

negative_reviw = "that movie sucked. I hated it and wouldn't watch it again. Was one of the worst things I've ever watched"
predict(negative_reviw)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[0.9762143]
The Model prediction is postive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[0.16543584]
The Model prediction is negative
