In [1]:
import kagglehub
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

In [2]:
# Download dataset
path = kagglehub.dataset_download("lakshmi25npathi/imdb-dataset-of-50k-movie-reviews")

# Load dataset
csv_path = os.path.join(path, "IMDB Dataset.csv")
df = pd.read_csv(csv_path)
df

Downloading from https://www.kaggle.com/api/v1/datasets/download/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews?dataset_version_number=1...


100%|██████████| 25.7M/25.7M [00:00<00:00, 145MB/s] 

Extracting files...





Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


In [3]:
# Encode labels
label_map = {'positive': 1, 'negative': 0}
df['sentiment'] = df['sentiment'].map(label_map)

# Preprocess text
tokenizer = Tokenizer(num_words=10000, oov_token="")
tokenizer.fit_on_texts(df['review'])
sequences = tokenizer.texts_to_sequences(df['review'])
padded_sequences = pad_sequences(sequences, maxlen=200, padding='post', truncating='post')

In [4]:
# Split data
train_size = int(len(df) * 0.8)
x_train, x_test = padded_sequences[:train_size], padded_sequences[train_size:]
y_train, y_test = df['sentiment'][:train_size], df['sentiment'][train_size:]

In [5]:
# Build model
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=200),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])


model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



In [6]:
# Train model
history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 23ms/step - accuracy: 0.5472 - loss: 0.6845 - val_accuracy: 0.6078 - val_loss: 0.6647
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 18ms/step - accuracy: 0.5656 - loss: 0.6736 - val_accuracy: 0.5023 - val_loss: 0.6923
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.5012 - loss: 0.6929 - val_accuracy: 0.4993 - val_loss: 0.6923
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 17ms/step - accuracy: 0.5034 - loss: 0.6930 - val_accuracy: 0.5235 - val_loss: 0.6912
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.5216 - loss: 0.6910 - val_accuracy: 0.5697 - val_loss: 0.6795
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 16ms/step - accuracy: 0.7125 - loss: 0.5570 - val_accuracy: 0.8553 - val_loss: 0.3570
Epoch 7/10
[1m6

In [7]:
# Evaluate model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")
print(f"Test Loss: {loss:.4f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8682 - loss: 0.3525
Test Accuracy: 86.54%
Test Loss: 0.3602


In [8]:
# Test with custom sentence
def predict_sentiment(sentence):
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=200, padding='post', truncating='post')
    prediction = model.predict(padded_sequence)[0][0]
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    print(f"Predicted Sentiment: {sentiment} ({prediction:.4f})")




######
predict_sentiment("The film was a complete disaster, I regret watching it.")
# Expected: Negative
predict_sentiment("why did they not follow the book i am really sad and disappointed i was so looking forward to seeing this movie however if you have read the book maybe recently it might be very difficult to remain objective my wife had not read the book and she loved the movie br br reasons for the disappointment are 1  involvement gone with the wind such a shame there is a very small part at the start where the  is created but even that does not stick to the facts why not the fact that  father was burned with the first  brand which is how  got involved would have been a perfect start to the movie 2 story line between the deceased pope and  completely gone this completely  up the motive for the stealing of the  3 story line between  and   completely non existent br br all in all too  storyline and cannot understand that dan brown allowed them to put his name against it maybe i should  this film in 10 years time when i cannot remember the excellent book anymore fat chance on forgetting the book")
# Expected: Negative
predict_sentiment("Recap: Something mysterious transmits radio signals from Antarctica. Julian Rome, a former SETI worker, is brought in. But the plot has too many holes—Washington somehow knows too much, the alien pod transmits in English, and the ending is too open-ended. The movie is a little entertaining, but too much effort is needed to fill in the plot gaps. 3/10.")
# Expected: Negative
#######
predict_sentiment("This movie was absolutely fantastic, I loved every moment!")
# Expected: Positive
predict_sentiment("Watched this last night and was amazed by the heartfelt story, excellent character development, and good vibes from the acting. The story follows an ordinary man who takes dance lessons and learns about himself and others. The film is lighthearted and funny, yet dignified. Highly recommended for families and teens.")
# Expected: Positive
predict_sentiment("this is a very beautiful and almost  film there is hardly any dialogue in it apart from the narration and the scenery and music compliment each other perfectly i didn't at first connect the red hair of the girl and the fox until it was pointed out to me by a friend who also has red hair it is almost an old fashioned type of children's films saying that children nowadays prefer animations like  or toy story etc but i feel that young people should be introduced more to the beauty and wonder of nature which this film certainly does maybe not the best ever film of its type but certainly an excellent and relaxing view for all ages not just children")
# Expected: Positive
#####

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
Predicted Sentiment: Negative (0.0532)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predicted Sentiment: Negative (0.0091)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predicted Sentiment: Negative (0.0244)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Predicted Sentiment: Positive (0.9863)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predicted Sentiment: Positive (0.9912)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Predicted Sentiment: Positive (0.9893)
