# Sample Prediction

## <font colors = 'yellow'> Importing Required Libiraries </font>

In [4]:
# Importing pandas for data manipulation and analysis
import pandas as pd

# Importing numpy for numerical computations
import numpy as np

# Importing TensorFlow for building and training neural networks
import tensorflow

# importing keras
import keras

# Importing the IMDB dataset from Keras, which is a part of TensorFlow
from tensorflow.keras.datasets import imdb

# Importing sequence preprocessing utilities for padding and truncating sequences
from tensorflow.keras.preprocessing import sequence

# Importing Sequential to define a linear stack of layers for the model
from tensorflow.keras.models import Sequential

# Importing Embedding layer to convert words into dense vectors of fixed size
# Importing SimpleRNN layer to build a simple recurrent neural network
# Importing Dense layer to add fully connected layers to the model
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [2]:
# Checking the tensorflow libraries
print(tensorflow.__version__)

# checking the keras libraries
print(keras.__version__)


2.19.0
3.9.2


## <font color = 'yellow'> Load dateset - IMDB word index </font>

In [5]:
# get word index from the IMDB dataset
word_index = imdb.get_word_index() # Get the word index mapping from the IMDB dataset

# reverse the word index mapping to get a mapping from indices to words
reversce_word_index = dict([(value, key) for (key, value) in word_index.items()]) 

reversce_word_index

{34701: 'fawn',
 52006: 'tsukino',
 52007: 'nunnery',
 16816: 'sonja',
 63951: 'vani',
 1408: 'woods',
 16115: 'spiders',
 2345: 'hanging',
 2289: 'woody',
 52008: 'trawling',
 52009: "hold's",
 11307: 'comically',
 40830: 'localized',
 30568: 'disobeying',
 52010: "'royale",
 40831: "harpo's",
 52011: 'canet',
 19313: 'aileen',
 52012: 'acurately',
 52013: "diplomat's",
 25242: 'rickman',
 6746: 'arranged',
 52014: 'rumbustious',
 52015: 'familiarness',
 52016: "spider'",
 68804: 'hahahah',
 52017: "wood'",
 40833: 'transvestism',
 34702: "hangin'",
 2338: 'bringing',
 40834: 'seamier',
 34703: 'wooded',
 52018: 'bravora',
 16817: 'grueling',
 1636: 'wooden',
 16818: 'wednesday',
 52019: "'prix",
 34704: 'altagracia',
 52020: 'circuitry',
 11585: 'crotch',
 57766: 'busybody',
 52021: "tart'n'tangy",
 14129: 'burgade',
 52023: 'thrace',
 11038: "tom's",
 52025: 'snuggles',
 29114: 'francesco',
 52027: 'complainers',
 52125: 'templarios',
 40835: '272',
 52028: '273',
 52130: 'zaniacs',

## <font color = 'yellow'> Load Trained Model </font>

In [8]:
# Import the necessary module
from keras.models import load_model
import os
from pathlib import Path

# # Load the saved RNN model from the file
# model = load_model('./my_rnn_model.keras')

model_path = model_path = Path("my_rnn_model.keras").resolve()
model = keras.models.load_model(model_path)

  saveable.load_own_variables(weights_store.get(inner_path))


In [5]:
model.get_weights()

[array([[-0.20335908, -0.04166745, -0.00464649, ..., -0.20630755,
         -0.03109794,  0.21587583],
        [-0.03529742, -0.01449239,  0.01961466, ..., -0.07078966,
         -0.01874667,  0.02988595],
        [-0.08103805,  0.10301799, -0.08372701, ..., -0.0594093 ,
         -0.06925194,  0.03259673],
        ...,
        [ 0.07296191, -0.09046352,  0.06936408, ...,  0.12641521,
          0.07282709, -0.00483994],
        [ 0.0358222 ,  0.02246669,  0.03047189, ...,  0.04080334,
         -0.03571145,  0.04773964],
        [ 0.025585  , -0.09616893, -0.11657958, ...,  0.06787089,
         -0.09465314, -0.14647941]], dtype=float32),
 array([[ 0.07140467, -0.10721745,  0.06146709, ...,  0.10188872,
         -0.00026403, -0.04628385],
        [ 0.04959622, -0.11314866, -0.05783533, ...,  0.02296993,
         -0.07428459, -0.01423851],
        [ 0.06031627,  0.0289358 ,  0.00748892, ..., -0.00816388,
          0.01423173,  0.09239241],
        ...,
        [-0.11224694,  0.04203565, -0.0

## <font color = 'yellow'> Add-on Functions </font>

### function to decode review

In [9]:
# Function to convert numerical review into readable text
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i-3, '?') for i in encoded_review])

### Function to Preprocess user input

In [7]:
# Import necessary libraries
import keras

# Function to preprocess user input for deep learning models
def pre_process_input(user_input):
    """
    Converts user text input into a format suitable for deep learning models.
    Steps:
    1. Lowercase conversion and word tokenization.
    2. Mapping words to numerical indices using 'word_index'.
    3. Applying padding to ensure a fixed sequence length.
    """

    # Step 1: Convert text to lowercase and split into words
    words = user_input.lower().split()

    # Step 2: Convert words into numerical indices using 'word_index'
    # - If a word is not found in 'word_index', fallback index '2' is used.
    # - '+3' is added to the index to account for reserved tokens.
    encoded_review = [word_index.get(word, 2) + 3 for word in words]

    # Step 3: Apply padding to standardize sequence length (600 tokens)
    # - Short sequences are padded (usually with zeros).
    # - Long sequences are truncated from the beginning.
    padded_review = keras.preprocessing.sequence.pad_sequences([encoded_review], maxlen=600)

    # Step 4: Return the processed sequence ready for model input
    return padded_review

'''
Explaination:
Encoded Review:
    Convert Words to Encoded Tokens
    encoded_review = [word_index.get(word, 2) + 3 for word in words]
        - Uses word_index, which is a dictionary mapping words to numerical values.
        - Retrieves each word's corresponding index (or assigns 2 if the word is not found in word_index).
        - Adds 3 to each index (i+3)—likely done to account for reserved tokens (padding, unknown, start-of-sequence).

Pad the Encoded Sequence
    padded_review = keras.preprocessing.sequence.pad_sequences([encoded_review], maxlen=100)
        - Ensures the sequence has a fixed length of 600 tokens. (maxlen = 600)
        - Shorter sequences are padded (usually with zeros).
        - Longer sequences are truncated from the beginning.
        - pad_sequences() takes a list of sequences, so [encoded_review] is passed.
'''

"\nExplaination:\nEncoded Review:\n    Convert Words to Encoded Tokens\n    encoded_review = [word_index.get(word, 2) + 3 for word in words]\n        - Uses word_index, which is a dictionary mapping words to numerical values.\n        - Retrieves each word's corresponding index (or assigns 2 if the word is not found in word_index).\n        - Adds 3 to each index (i+3)—likely done to account for reserved tokens (padding, unknown, start-of-sequence).\n\nPad the Encoded Sequence\n    padded_review = keras.preprocessing.sequence.pad_sequences([encoded_review], maxlen=100)\n        - Ensures the sequence has a fixed length of 600 tokens. (maxlen = 600)\n        - Shorter sequences are padded (usually with zeros).\n        - Longer sequences are truncated from the beginning.\n        - pad_sequences() takes a list of sequences, so [encoded_review] is passed.\n"

### Prediction Function

In [8]:
# Function to predict sentiment of a given review
def predict_statement(review):
    """
    Processes user input, predicts sentiment using the trained model, 
    and classifies the review as Positive or Negative.
    """

    # Step 1: Preprocess the input review
    processed_input = pre_process_input(review)  # Converts text to numerical format
    
    # Step 2: Make a prediction using the trained model
    prediction = model.predict(processed_input)  # Predicts sentiment score (probability between 0-1)

    # Step 3: Interpret the prediction
    # - If prediction > 0.5 → Positive sentiment
    # - Else → Negative sentiment
    statement = 'Positive' if prediction > 0.5 else 'Negative'

    # Step 4: Return sentiment label and raw prediction score
    return statement, prediction[0][0]


## <font color = 'yellow'> User Input and Output </font>

In [9]:
# Example review text to analyze sentiment
# example_review = 'This movie was fantastic! The acting was great and the plot was thrilling.'
example_review = 'what a nice movie i seen'

# Step 1: Call the sentiment prediction function
statement, score = predict_statement(example_review)

# Step 2: Print the original review
print(f'Example Review : {example_review}')

# Step 3: Print sentiment classification result
print(f'Statement Result : {statement}')  # Expected Output: "Positive" or "Negative"

# Step 4: Print the actual prediction score (probability between 0 and 1)
print(f'Prediction Score : {score}')  # Example Output: 0.87 (closer to 1 means positive sentiment)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 657ms/step
Example Review : what a nice movie i seen
Statement Result : Positive
Prediction Score : 0.7302392721176147
