In [24]:
from keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence


In [25]:
vocab_size = 5000
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)
print(train_data[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 2, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 2, 19, 178, 32]


In [26]:
word_idx = imdb.get_word_index()
 
# Originally the index number of a value and not a key,
# hence converting the index as key and the words as values
word_idx = {i: word for word, i in word_idx.items()}
 
# again printing the review
print([word_idx[i] for i in train_data[0]])

['the', 'as', 'you', 'with', 'out', 'themselves', 'powerful', 'lets', 'loves', 'their', 'becomes', 'reaching', 'had', 'journalist', 'of', 'lot', 'from', 'anyone', 'to', 'have', 'after', 'out', 'atmosphere', 'never', 'more', 'room', 'and', 'it', 'so', 'heart', 'shows', 'to', 'years', 'of', 'every', 'never', 'going', 'and', 'help', 'moments', 'or', 'of', 'every', 'chest', 'visual', 'movie', 'except', 'her', 'was', 'several', 'of', 'enough', 'more', 'with', 'is', 'now', 'current', 'film', 'as', 'you', 'of', 'mine', 'potentially', 'unfortunately', 'of', 'you', 'than', 'him', 'that', 'with', 'out', 'themselves', 'her', 'get', 'for', 'was', 'camp', 'of', 'you', 'movie', 'sometimes', 'movie', 'that', 'with', 'scary', 'but', 'and', 'to', 'story', 'wonderful', 'that', 'in', 'seeing', 'in', 'character', 'to', 'of', '70s', 'and', 'with', 'heart', 'had', 'shadows', 'they', 'of', 'here', 'that', 'with', 'her', 'serious', 'to', 'have', 'does', 'when', 'from', 'why', 'what', 'have', 'critics', 'they'

In [28]:
train_texts = [' '.join([word_idx.get(i, '') for i in seq]) for seq in train_data]
test_texts = [' '.join([word_idx.get(i, '') for i in seq]) for seq in test_data]

# Display the original review
print("Original Review:")
print(train_texts[0])

# Convert the text data to sequences using the Tokenizer
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(train_texts)

train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)


Original Review:
the as you with out themselves powerful lets loves their becomes reaching had journalist of lot from anyone to have after out atmosphere never more room and it so heart shows to years of every never going and help moments or of every chest visual movie except her was several of enough more with is now current film as you of mine potentially unfortunately of you than him that with out themselves her get for was camp of you movie sometimes movie that with scary but and to story wonderful that in seeing in character to of 70s and with heart had shadows they of here that with her serious to have does when from why what have critics they is you that isn't one will very to as itself with other and in of seen over and for anyone of and br show's to whether from than out themselves history he name half some br of and odd was two most of mean for 1 any an boat she he should is thought and but of script you not while history he heart to real at and but when from one bit then hav

In [29]:
print("Max length of review:  ", len(max((train_data+test_data), key=len)))
print("Min length of a review:  ", len(min((train_data+test_data), key=len)))

Max length of review:   2697
Min length of a review:   70


In [30]:


max_words = 400

train_data = sequence.pad_sequences(train_sequences, maxlen=max_words)
test_data = sequence.pad_sequences(test_sequences, maxlen=max_words)

x_valid, y_valid = train_data[:64], train_labels[:64]
train_data_, train_labels_ = train_data[64:], train_labels[64:]

In [31]:
embd_len = 32

RNN_model = Sequential(name="Simple_RNN")
RNN_model.add(layers.Embedding(vocab_size, embd_len, input_length=max_words))

RNN_model.add(layers.SimpleRNN(128, activation='tanh', return_sequences=False))

RNN_model.add(layers.Dense(1, activation='sigmoid'))

print(RNN_model.summary())

RNN_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

Model: "Simple_RNN"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 400, 32)           160000    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 128)               20608     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 180737 (706.00 KB)
Trainable params: 180737 (706.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [32]:
analysis = RNN_model.fit(train_data_,train_labels_, batch_size=64, epochs=5, verbose=1, validation_data=(x_valid, y_valid))
print()
print("Simple_RNN Score ---> ", RNN_model.evaluate(test_data, test_labels, verbose=0))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

Simple_RNN Score --->  [0.4469980001449585, 0.800279974937439]


In [39]:
new_text = "This movie was a complete disappointment. The plot was confusing, the characters were poorly developed, and the acting was terrible. I regret wasting my time and money on this film. Definitely not recommended."



# Preprocess the new text
new_text_sequence = tokenizer.texts_to_sequences([new_text])
new_text_padded = sequence.pad_sequences(new_text_sequence, maxlen=max_words)

# Make the prediction
prediction = RNN_model.predict(new_text_padded)

# Display the prediction
print("Sentiment Prediction:", prediction)

# Convert the prediction to label 0(negative), 1(neutral), 2(positive)
#predicted_label = 1 if prediction > 0.4 else 0

'''
if(prediction < 0.4):
    predicted_label = 0
elif(prediction >= 0.4 and prediction < 0.7):
    predicted_label = 1
else:
    predicted_label = 2



# Display the binary label
print("Predicted Label:", predicted_label)'''

Sentiment Prediction: [[0.92111695]]


'\nif(prediction < 0.4):\n    predicted_label = 0\nelif(prediction >= 0.4 and prediction < 0.7):\n    predicted_label = 1\nelse:\n    predicted_label = 2\n\n\n\n# Display the binary label\nprint("Predicted Label:", predicted_label)'