In [12]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.models import load_model

In [16]:
word_index = imdb.get_word_index()
reverse_word_index = {values:key for key, values in word_index.items()}

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [14]:
model = load_model('simple_rnn_imdb.h5')
model.summary()



In [15]:
model.get_weights()

[array([[-0.04075189,  0.23707266, -0.04425325, ...,  0.06358232,
          0.14317371,  0.15294816],
        [-0.01419552, -0.01252169,  0.00159679, ..., -0.01222592,
         -0.05909599, -0.0525274 ],
        [ 0.09360859, -0.03758175, -0.03821437, ..., -0.0525563 ,
         -0.02824563, -0.01299768],
        ...,
        [-0.09709489, -0.163351  ,  0.22535312, ...,  0.05709922,
         -0.5695526 ,  0.00965206],
        [-0.04788806, -0.00251744,  0.0253484 , ..., -0.04412328,
          0.03618584, -0.01821662],
        [-0.3579613 , -0.17596237,  0.03110517, ..., -0.10260399,
         -0.44224036,  0.0369277 ]], dtype=float32),
 array([[-0.17473978, -0.01011315, -0.06171831, ..., -0.08834611,
         -0.13058197, -0.1118517 ],
        [ 0.12643854,  0.07405404,  0.06722458, ..., -0.09794523,
          0.0530915 , -0.12397791],
        [ 0.11650997, -0.03889141, -0.07868643, ...,  0.04895309,
          0.11495681, -0.00390061],
        ...,
        [ 0.04956939,  0.01241155,  0.0

In [None]:
#Step 3 : Helper functions 
# Function to decode the reviews
def decode_reviews(encoded_reviews):
    return ' '.join([reverse_word_index.get(i,'?') for i in encoded_reviews])

# Function to preprocess user input
def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_index.get(word,2)+3 for word in words]
    padded_review = pad_sequences([encoded_review], maxlen=500)
    return padded_review

In [None]:
#Step 3
## Prediction function
def predict_sentiment(review):
    preprocessed_input = preprocess_text(review)
    prediction = model.predict(preprocessed_input)
    print(prediction)
    sentiment = "Positive" if prediction[0][0] > 0.5 else "Negative"
    return sentiment, prediction[0][0]

In [23]:
# Step 4
example_review = "Good movie, but not great"
sentiment, score = predict_sentiment(example_review)
print(f"Review: {example_review}")
print(f"Sentiment: {sentiment}")
print(f"Score: {score}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[[0.40065366]]
Review: Good movie, but not great
Sentiment: Negative
Score: 0.4006536602973938


In [24]:
## Load the imdb dataset
max_features = 1000 # number of words to consider as features(vocab_size)
(X_train, Y_train), (x_test, y_test) = imdb.load_data(num_words = max_features)

In [25]:
x_test

array([list([1, 591, 202, 14, 31, 6, 717, 10, 10, 2, 2, 5, 4, 360, 7, 4, 177, 2, 394, 354, 4, 123, 9, 2, 2, 2, 10, 10, 13, 92, 124, 89, 488, 2, 100, 28, 2, 14, 31, 23, 27, 2, 29, 220, 468, 8, 124, 14, 286, 170, 8, 157, 46, 5, 27, 239, 16, 179, 2, 38, 32, 25, 2, 451, 202, 14, 6, 717]),
       list([1, 14, 22, 2, 6, 176, 7, 2, 88, 12, 2, 23, 2, 5, 109, 943, 4, 114, 9, 55, 606, 5, 111, 7, 4, 139, 193, 273, 23, 4, 172, 270, 11, 2, 2, 4, 2, 2, 109, 2, 21, 4, 22, 2, 8, 6, 2, 2, 10, 10, 4, 105, 987, 35, 841, 2, 19, 861, 2, 5, 2, 2, 45, 55, 221, 15, 670, 2, 526, 14, 2, 4, 405, 5, 2, 7, 27, 85, 108, 131, 4, 2, 2, 2, 405, 9, 2, 133, 5, 50, 13, 104, 51, 66, 166, 14, 22, 157, 9, 4, 530, 239, 34, 2, 2, 45, 407, 31, 7, 41, 2, 105, 21, 59, 299, 12, 38, 950, 5, 2, 15, 45, 629, 488, 2, 127, 6, 52, 292, 17, 4, 2, 185, 132, 2, 2, 2, 488, 2, 47, 6, 392, 173, 4, 2, 2, 270, 2, 4, 2, 7, 4, 65, 55, 73, 11, 346, 14, 20, 9, 6, 976, 2, 7, 2, 861, 2, 5, 2, 30, 2, 2, 56, 4, 841, 5, 990, 692, 8, 4, 2, 398, 229, 10,

In [26]:
def add_padding(text):
    padded_review = pad_sequences([text], maxlen=500)
    return padded_review

In [28]:
print(y_test.shape, type(y_test))

(25000,) <class 'numpy.ndarray'>


In [None]:
x_test_processed = pad_sequences(x_test, maxlen=500)

In [34]:
y_pred = model.predict(x_test_processed)
y_pred

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 40ms/step


array([[0.09713704],
       [0.9044285 ],
       [0.8979006 ],
       ...,
       [0.19232342],
       [0.11779734],
       [0.929595  ]], dtype=float32)

In [42]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
score = accuracy_score(y_test, y_pred > 0.5)
cm = confusion_matrix(y_test, y_pred > 0.5)
cr = classification_report(y_test, y_pred > 0.5)
print(score)
print(cm)
print(cr)

0.79428
[[ 9733  2767]
 [ 2376 10124]]
              precision    recall  f1-score   support

           0       0.80      0.78      0.79     12500
           1       0.79      0.81      0.80     12500

    accuracy                           0.79     25000
   macro avg       0.79      0.79      0.79     25000
weighted avg       0.79      0.79      0.79     25000



In [45]:
model.evaluate(x_test_processed, y_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 41ms/step - accuracy: 0.7943 - loss: 0.4837


[0.48372313380241394, 0.7942799925804138]