In [None]:
# import library
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [None]:
# load the data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

In [None]:
# pad the sequences to all reviews have same length
x_train = pad_sequences(x_train, maxlen=500) # 500 words
x_test = pad_sequences(x_test, maxlen=500)   # 500 words

In [None]:
print(f'x train shape: {x_train.shape}')
print(f'x shape: {y_train.shape}')
print(f'y train shape: {x_test.shape}')
print(f'y test shape: {y_test.shape}')

x train shape: (25000, 500)
x shape: (25000,)
y train shape: (25000, 500)
y test shape: (25000,)


In [None]:
# Model Building
model = Sequential()
model.add(Embedding(input_dim=10000,output_dim=32,input_length=500))
model.add(SimpleRNN(units=100))
model.add(Dense(1,activation='sigmoid'))



In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(x_train, y_train,
                    validation_split=0.2,  # Use 20% of the data for validation
                    epochs=30,
                    batch_size=128)


Epoch 1/30
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 69ms/step - accuracy: 0.5288 - loss: 0.6891 - val_accuracy: 0.6242 - val_loss: 0.6493
Epoch 2/30
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 57ms/step - accuracy: 0.7245 - loss: 0.5626 - val_accuracy: 0.6630 - val_loss: 0.5959
Epoch 3/30
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 54ms/step - accuracy: 0.7894 - loss: 0.4644 - val_accuracy: 0.8218 - val_loss: 0.4199
Epoch 4/30
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 58ms/step - accuracy: 0.8679 - loss: 0.3187 - val_accuracy: 0.8370 - val_loss: 0.3931
Epoch 5/30
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 58ms/step - accuracy: 0.8235 - loss: 0.4034 - val_accuracy: 0.7024 - val_loss: 0.5685
Epoch 6/30
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 58ms/step - accuracy: 0.8158 - loss: 0.4296 - val_accuracy: 0.8040 - val_loss: 0.4668
Epoch 7/30
[1m157/1

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {test_accuracy}')


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 22ms/step - accuracy: 0.7793 - loss: 0.6691
Test Accuracy: 0.7813199758529663


In [None]:
# Save the model
model.save('sentiment_analysis_model.h5')



In [None]:
# Function to predict sentiment of a review using the loaded model
def predict_sentiment(review, model):
    # Load the IMDB tokenizer to convert words to indices
    word_index = imdb.get_word_index()

    # Preprocess the review
    review_encoded = [word_index.get(word.lower(), 0) for word in review.split()]
    review_padded = pad_sequences([review_encoded], maxlen=500)

    # Make a prediction
    prediction = model.predict(review_padded)
    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    confidence = prediction[0][0] if sentiment == 'Positive' else 1 - prediction[0][0]

    return sentiment, confidence


In [None]:
# Test the function with sample reviews
sample_reviews = [
    "I loved this movie! It was fantastic and thrilling.",
    "This film was terrible. I hated it and would not recommend it."
]

for review in sample_reviews:
    sentiment, confidence = predict_sentiment(review, loaded_model)
    print(f"Review: {review}\nPredicted Sentiment: {sentiment}, Confidence: {confidence:.4f}\n")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 358ms/step
Review: I loved this movie! It was fantastic and thrilling.
Predicted Sentiment: Positive, Confidence: 0.9998

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Review: This film was terrible. I hated it and would not recommend it.
Predicted Sentiment: Positive, Confidence: 0.9995

