In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load the dataset
df = pd.read_csv('/kaggle/input/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv')

df.head(10)

2024-04-27 08:56:05.530697: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-27 08:56:05.530794: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-27 08:56:05.665042: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
5,"Probably my all-time favorite movie, a story o...",positive
6,I sure would like to see a resurrection of a u...,positive
7,"This show was an amazing, fresh & innovative i...",negative
8,Encouraged by the positive comments about this...,negative
9,If you like original gut wrenching laughter yo...,positive


In [3]:

# Encode the sentiment column
label_encoder = LabelEncoder()
df['sentiment'] = label_encoder.fit_transform(df['sentiment'])



In [4]:
# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['review'])
X = tokenizer.texts_to_sequences(df['review'])
X = pad_sequences(X)



In [5]:
# Model Definition
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=128))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [6]:

# Model Training
model.fit(X, df['sentiment'], epochs=5, batch_size=128, validation_split=0.2)


Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 206ms/step - accuracy: 0.6795 - loss: 0.5743 - val_accuracy: 0.8107 - val_loss: 0.4305
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 205ms/step - accuracy: 0.9029 - loss: 0.2546 - val_accuracy: 0.8758 - val_loss: 0.2951
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 204ms/step - accuracy: 0.9327 - loss: 0.1882 - val_accuracy: 0.8851 - val_loss: 0.3311
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 204ms/step - accuracy: 0.9742 - loss: 0.0820 - val_accuracy: 0.8660 - val_loss: 0.3472
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 205ms/step - accuracy: 0.9817 - loss: 0.0608 - val_accuracy: 0.8855 - val_loss: 0.4080


<keras.src.callbacks.history.History at 0x7c5d94a89f00>

In [7]:

# Save Model
model.save('/kaggle/working/sentiment_analysis_model.h5')


In [8]:

# Load Model for Prediction
from tensorflow.keras.models import load_model
model = load_model('/kaggle/working/sentiment_analysis_model.h5')


In [29]:

# Example Prediction
new_review = "Spiderman movie was so bad. i will never recommend this movie"
new_review_seq = tokenizer.texts_to_sequences([new_review])
new_review_seq = pad_sequences(new_review_seq, maxlen=X.shape[1])
prediction = model.predict(new_review_seq)
print(prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[[0.08557002]]


In [30]:
# Convert prediction to human-readable format
threshold = 0.5
sentiment = "positive" if prediction > threshold else "negative"

print(f"The sentiment of the review is: {sentiment}")

The sentiment of the review is: negative
