<a href="https://colab.research.google.com/github/Likhi-tech/Deep-learning-projects/blob/main/movie_review_analysis_with_IMDB_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Step 1: Import necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Step 2: Load and preprocess the IMDB dataset
num_words = 10000 # Only keep the top 10,000 most frequent words
maxlen = 200 # Maximum sequence length
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
# Step 3: Build a machine learning model
model = keras.Sequential([
 keras.layers.Embedding(input_dim=num_words, output_dim=128, input_length=maxlen),
 keras.layers.LSTM(128),
 keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Step 4: Train the model
batch_size = 32
epochs = 5
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)

Epoch 1/5




[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 400ms/step - accuracy: 0.7052 - loss: 0.5411 - val_accuracy: 0.8476 - val_loss: 0.3606
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 389ms/step - accuracy: 0.8905 - loss: 0.2741 - val_accuracy: 0.8402 - val_loss: 0.3740
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 384ms/step - accuracy: 0.9192 - loss: 0.2130 - val_accuracy: 0.8546 - val_loss: 0.3679
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 401ms/step - accuracy: 0.9508 - loss: 0.1370 - val_accuracy: 0.8454 - val_loss: 0.4012
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 388ms/step - accuracy: 0.9712 - loss: 0.0872 - val_accuracy: 0.8434 - val_loss: 0.4680


<keras.src.callbacks.history.History at 0x7c2716851a20>

In [6]:
# Step 5: Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 123ms/step - accuracy: 0.8453 - loss: 0.4694
Test loss: 0.4549
Test accuracy: 0.8482


In [7]:
# Step 6: Make predictions
import tensorflow as tf # Import tensorflow to access keras

sample_review = "This movie was fantastic! I loved every moment of it."
sample_sequence = [tf.keras.datasets.imdb.get_word_index().get(word, 0) for word in sample_review.split()] # Use tf.keras.datasets.imdb to access get_word_index()
sample_sequence = pad_sequences([sample_sequence], maxlen=maxlen)
prediction = model.predict(sample_sequence)
sentiment = "Positive" if prediction[0] >= 0.5 else "Negative"
print(f"Predicted sentiment: {sentiment} (Probability: {prediction[0][0]:.4f})")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 420ms/step
Predicted sentiment: Positive (Probability: 0.5354)
