In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model

In [3]:
## load the IMDB dataset word index
word_idx = imdb.get_word_index()
reverse_word_idx = {value: key for key, value in word_idx.items()}

In [5]:
## load the pre-trained model
model = load_model('simple_rnn_imdb.h5')  




In [6]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 128)          1280000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               32896     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [7]:
model.get_weights()

[array([[ 0.02901422, -0.01813084, -0.05364007, ..., -0.0057711 ,
          0.00737218,  0.02366708],
        [ 0.0768182 , -0.04914268, -0.05618829, ..., -0.00865434,
          0.0104905 ,  0.06005235],
        [-0.01826759,  0.00859141,  0.01612857, ...,  0.07113775,
          0.04598568, -0.11507121],
        ...,
        [ 0.1311614 , -0.07095106, -0.12485666, ..., -0.11249772,
          0.13309519,  0.04041913],
        [-0.07887197,  0.19252826, -0.00975803, ...,  0.12202068,
          0.01078445, -0.11189645],
        [ 0.05648317, -0.04136198, -0.15794896, ..., -0.15362857,
          0.20256418,  0.08688666]], dtype=float32),
 array([[-6.05826080e-02,  8.35412145e-02,  4.09397595e-02, ...,
          1.14980554e-02, -3.64159718e-02, -1.36757866e-01],
        [ 2.80910786e-02, -1.48472726e-01, -1.25130653e-01, ...,
          5.70303909e-02, -1.51749160e-02,  1.47134498e-01],
        [-2.21066121e-02,  1.48093238e-01, -1.18990560e-04, ...,
         -3.71142849e-02, -8.83741602e-02

In [8]:
# fucntion to decode the review:
def decode_review(encoded_review):
    decoded_review = ' '.join([reverse_word_idx.get(i - 3, '') for i in encoded_review])
    return decoded_review

# function to preprocess user input
def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_idx.get(word, 2) + 3 for word in words]
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    return padded_review

In [9]:
## prediction func()

def predict_sentiment(review):
    preprocess_input = preprocess_text(review)
    
    prediction = model.predict(preprocess_input)
    
    sentiment = 'positive' if prediction > 0.5 else 'negative'
    
    return sentiment, prediction[0][0]

In [11]:
## sample input
example_review = "This is the worst movie I have ever seen. I don't recommend to anyone."

sentiment, prediction = predict_sentiment(example_review)

print(f'Review: {example_review}')
print(f'sentiment: {sentiment}')
print(f'prediction: {prediction}')

Review: This is the worst movie I have ever seen. I don't recommend to anyone.
sentiment: negative
prediction: 0.3760667145252228
