In [None]:
import tensorflow as tf
from keras.datasets import imdb
from keras.preprocessing import sequence
import numpy as np

In [None]:
vocabulary = 100000
max_len = 50
batch_size = 64
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocabulary)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
train_data[0]

array([2071,   56,   26,  141,    6,  194, 7486,   18,    4,  226,   22,
         21,  134,  476,   26,  480,    5,  144,   30, 5535,   18,   51,
         36,   28,  224,   92,   25,  104,    4,  226,   65,   16,   38,
       1334,   88,   12,   16,  283,    5,   16, 4472,  113,  103,   32,
         15,   16, 5345,   19,  178,   32], dtype=int32)

In [None]:
train_data= sequence.pad_sequences(train_data, maxlen= max_len)
test_data= sequence.pad_sequences(test_data, maxlen= max_len)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim= max_len, output_dim= batch_size),
    tf.keras.layers.LSTM(units= batch_size),
    tf.keras.layers.Dense(units= 1, activation= "sigmoid")
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 64)          3200      
                                                                 
 lstm (LSTM)                 (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 36289 (141.75 KB)
Trainable params: 36289 (141.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss=tf.keras.losses.BinaryCrossentropy(), metrics="acc")

In [None]:
model.fit(x=train_data, y= train_labels, batch_size= batch_size, epochs=20, verbose=1, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7b4446412980>

In [None]:
model.evaluate(test_data, test_labels)



[0.6582735776901245, 0.6127600073814392]

In [None]:
word_index = imdb.get_word_index()

def encode_text(text):
  tokens = tf.keras.preprocessing.text.text_to_word_sequence(text)
  tokens = [word_index[word] if word in word_index else 0 for word in tokens]
  return sequence.pad_sequences([tokens], max_len)[0]


In [None]:
def predict(text):
  encoded_text = encode_text(text)
  pred = np.zeros((1,max_len))
  pred[0] = encoded_text
  result = model.predict(pred, verbose=0)
  if result[0] > 0.5:
    print("Positive", result[0])
  else:
    print("Negative", result[0])

positive_review = "That movie was! really loved it and would great watch it again because it was amazingly great"
predict(positive_review)

negative_review = "that movie really sucks sucks sucks sucks. I really hate hate suck this. I hated it and wouldn't watch it again. Was one of the worst things I've ever watched"
predict(negative_review)

Positive [0.5489679]
Positive [0.5444149]
