In [59]:
# Load and preprocess the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# Pad sequences to ensure uniform length
maxlen = 500
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

print(f"Training data shape: {x_train.shape}")
print(f"Testing data shape: {x_test.shape}")

Training data shape: (25000, 500)
Testing data shape: (25000, 500)


In [60]:
# Create the SimpleRNN model with improved architecture
model = Sequential([
    Embedding(10000, 64),  # Increased embedding dimension
    SimpleRNN(64, dropout=0.2, recurrent_dropout=0.2),  # Added dropout for regularization
    Dense(32, activation='relu'),  # Added intermediate dense layer
    Dense(1, activation='sigmoid')
])

# Build the model with input shape
model.build(input_shape=(None, 500))  # 500 is our maxlen

# Compile the model with improved settings
model.compile(optimizer='adam',  # Changed to adam optimizer
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Display model summary
model.summary()

In [61]:
# Train the model with improved training parameters
history = model.fit(x_train, y_train,
                    epochs=15,  # Increased epochs
                    batch_size=64,  # Reduced batch size for better generalization
                    validation_split=0.2,
                    verbose=1)

# Evaluate the model on test data
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'\nTest accuracy: {test_acc:.3f}')

Epoch 1/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 113ms/step - accuracy: 0.5120 - loss: 0.6965 - val_accuracy: 0.5358 - val_loss: 0.6888
Epoch 2/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 112ms/step - accuracy: 0.5314 - loss: 0.6894 - val_accuracy: 0.5548 - val_loss: 0.6795
Epoch 3/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 112ms/step - accuracy: 0.5590 - loss: 0.6771 - val_accuracy: 0.5612 - val_loss: 0.6690
Epoch 4/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 110ms/step - accuracy: 0.5862 - loss: 0.6631 - val_accuracy: 0.6044 - val_loss: 0.6488
Epoch 5/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 112ms/step - accuracy: 0.6110 - loss: 0.6424 - val_accuracy: 0.6038 - val_loss: 0.6414
Epoch 6/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 110ms/step - accuracy: 0.6410 - loss: 0.6228 - val_accuracy: 0.6156 - val_loss: 0.6315
Epoch 7/15

In [62]:
# Save the trained model
model.save('simple_rnn_imdb.h5')
print("Model saved successfully!")



Model saved successfully!


In [63]:
import numpy as np
import pandas as pd
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense

In [64]:
word_index=imdb.get_word_index()
reverse_word_index={value: key for key,value in word_index.items()}


In [4]:
model=load_model('simple_rnn_imdb.h5')
model.summary()



In [65]:
model.get_weights()

[array([[-0.06399251, -0.03257281,  0.07295468, ...,  0.0394618 ,
          0.05375782,  0.04740079],
        [-0.09777822,  0.05513681, -0.00538881, ...,  0.04694052,
          0.03853096,  0.00588319],
        [ 0.03616268,  0.09125412,  0.01456579, ...,  0.02248602,
         -0.02157305,  0.03995638],
        ...,
        [ 0.07937548, -0.04275234,  0.01933134, ..., -0.09270284,
         -0.05238854,  0.00417251],
        [-0.09213   , -0.13485558, -0.07747236, ..., -0.00259153,
         -0.05790082,  0.13588387],
        [ 0.09953211, -0.08207335,  0.09139773, ..., -0.01613648,
          0.02972493,  0.062899  ]], shape=(10000, 64), dtype=float32),
 array([[-0.09233293,  0.06172058, -0.10504811, ...,  0.03728642,
         -0.25440368,  0.07704142],
        [ 0.18230826,  0.09765802,  0.25249586, ...,  0.07483737,
         -0.2143968 ,  0.12827687],
        [ 0.19180496,  0.10723943,  0.02128827, ..., -0.05598957,
         -0.08973702, -0.06318617],
        ...,
        [ 0.01606019

In [66]:
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in encoded_review])

def preprocess_text(text):
    words=text.lower().split()
    encoded_review=[word_index.get(word,2)+ 3 for word in words]
    padded_review=sequence.pad_sequences([encoded_review],maxlen=500)
    return padded_review

In [67]:
#prediction function
def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)
    
    prediction=model.predict(preprocessed_input)
    sentiment='Positive' if prediction[0][0]>0.5 else 'Negative'
    return sentiment,prediction[0][0]

In [68]:
example_review="This movie was fantastic! The acting was great and the plot was thrilling."
sentiment,score=predict_sentiment(example_review)
print(f'Review:{example_review}')
print(f'Sentiment: {sentiment}')
print(f'Prediction Score: {score}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214ms/step
Review:This movie was fantastic! The acting was great and the plot was thrilling.
Sentiment: Positive
Prediction Score: 0.6837202906608582


In [69]:
example_review="This movie was boring! The acting bad"
sentiment,score=predict_sentiment(example_review)
print(f'Review:{example_review}')
print(f'Sentiment: {sentiment}')
print(f'Prediction Score: {score}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Review:This movie was boring! The acting bad
Sentiment: Negative
Prediction Score: 0.038917504251003265
