In [5]:
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Dense,SimpleRNN,Embedding
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

In [3]:
word_index=imdb.get_word_index()
reversed_word_index={value:key for key,value in word_index.items()}

In [6]:
model=load_model('simple_rnn_imdb.h5')



In [7]:
model.summary()

In [10]:
model.trainable_weights

[<Variable path=sequential_7/embedding_5/embeddings, shape=(10000, 128), dtype=float32, value=[[-0.1579209  -0.7944646  -0.5848729  ...  0.31826112  0.31568307
    0.26202083]
  [ 0.00269635 -0.08671962  0.01333705 ...  0.03790532 -0.04158057
   -0.07289447]
  [-0.10507993 -0.15694335 -0.15629762 ...  0.02481078 -0.09032906
   -0.23159121]
  ...
  [-0.03621292 -0.01667756  0.03665846 ...  0.00253278  0.03587114
   -0.01703805]
  [ 0.12046957  0.14149809  0.06955914 ...  0.03957028  0.03262229
   -0.13771558]
  [ 0.10179795 -0.0074832  -0.07087096 ... -0.03440177  0.01026359
    0.1007484 ]]>,
 <Variable path=sequential_7/simple_rnn_4/simple_rnn_cell/kernel, shape=(128, 128), dtype=float32, value=[[ 0.02312728 -0.05417453  0.00127115 ...  0.14688008  0.11737542
    0.14240532]
  [-0.04489541 -0.07961182 -0.06160703 ...  0.07122011  0.19740303
   -0.15099224]
  [-0.10723075 -0.04427017 -0.0430157  ...  0.25084215 -0.11555833
    0.03675208]
  ...
  [-0.16950569 -0.0032682  -0.14392856 ..

In [11]:
def decoded_review(encoded_review):
    return ' '.join([reversed_word_index.get(i-1,'?') for i in encoded_review])


In [12]:
def preprocess_text(text):
    words=text.lower().split()
    encoded_review=[word_index.get(word,2)+3 for word in words]
    padded_review=sequence.pad_sequences([encoded_review],maxlen=200)
    return padded_review

Step-by-Step

text.lower().split()

Converts everything to lowercase.

Splits sentence into words.

Example: "The Movie Was Good" → ["the", "movie", "was", "good"].

word_index.get(word, 2) + 3

word_index is a dictionary mapping words → integer IDs.

get(word, 2) → returns the ID of the word if found, otherwise 2 (which stands for unknown word in IMDB).

+3 → adjusts the index because IMDB reserves the first 3 indices:

0 = padding

1 = start of sequence

2 = unknown word

So the real vocabulary starts at index 3.

sequence.pad_sequences([encoded_review], maxlen=200)

Makes sure all reviews have the same length (200 here).

If review is shorter → pads with zeros at the beginning.

If review is longer → truncates from the start.

Output shape: (1, 200)

In [16]:
def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)
    prediction=model.predict(preprocessed_input)

    sentiment='Positive' if prediction[0][0]>0.5 else 'Negative'
    return prediction,sentiment


In [17]:
example_review='''The Movie was Amazing! such a fantastic art'''

In [18]:
sentiment,score=predict_sentiment(example_review)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step


In [19]:
print(f'sentiment:{sentiment}')
print(f'Score:{score}')

sentiment:[[0.9808817]]
Score:Positive
