In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model




In [2]:
word_index=imdb.get_word_index()
reverse_word_index={value:key for key,value in word_index.items()}

In [3]:
model=load_model('simplernn_imdb_new.h5')
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 128)          1280000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               32896     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [4]:
model.get_weights()

[array([[ 0.05752382, -0.01656881, -0.0382678 , ..., -0.02105913,
          0.02834929, -0.05175304],
        [ 0.05171227,  0.05147422, -0.0243834 , ...,  0.00695982,
         -0.02320295,  0.00445587],
        [-0.00796932,  0.03888009,  0.0092711 , ..., -0.01781783,
         -0.01486864, -0.00208145],
        ...,
        [ 0.02665833,  0.01133152, -0.03095622, ...,  0.02933452,
         -0.02367445, -0.00129976],
        [ 0.05795081,  0.00148676, -0.01825889, ...,  0.00991035,
         -0.02529667,  0.02317122],
        [ 0.01077077,  0.03338808,  0.00993272, ...,  0.04462409,
          0.02651378, -0.03253398]], dtype=float32),
 array([[ 0.01629644,  0.01490237,  0.01859444, ...,  0.04640658,
         -0.06828795,  0.04151828],
        [ 0.05860238, -0.04524141,  0.06458291, ...,  0.02239025,
          0.0419944 ,  0.05790158],
        [-0.17196393, -0.00910455,  0.09137358, ...,  0.04444037,
         -0.02579504,  0.02720469],
        ...,
        [ 0.05645825, -0.12077858,  0.1

In [5]:
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in encoded_review])

def preprocess_text(text):
    words=text.lower().split()
    encoded_review=[word_index.get(word,2)+3 for word in words] #seach in word_index if not present gets value from 2nd index which is reserved to be unknown
    # +3 since words start from 4 as 0 is for padding, 1 is for start token, 2 is for unknown token
    padded_review=sequence.pad_sequences([encoded_review],maxlen=500)
    return padded_review

In [6]:
def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)
    prediction=model.predict(preprocessed_input)
    sentiment='Positive' if prediction[0][0] > 0.5 else 'Negative'
    return sentiment,prediction[0][0]

In [11]:
ex="This movie was terrible the acting was bad and the story made no sense"


In [12]:
sentiment,score=predict_sentiment(ex)
print(ex)
print(sentiment)
print(score)

This movie was terrible the acting was bad and the story made no sense
Negative
0.17939179
