In [17]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [18]:
df = pd.read_csv("IMDB Dataset.csv")
df

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


In [19]:
label_encoder = LabelEncoder()
df['sentiment'] = label_encoder.fit_transform(df['sentiment'])

In [20]:
reviews = df['review'].values
labels = df['sentiment'].values

In [21]:
x_train,x_test,y_train,y_test = train_test_split(reviews, labels, test_size=0.2, random_state=42)

In [22]:
vocab_size = 10000
max_length = 200

tokenizer = Tokenizer(num_words=vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(x_train)

In [23]:
x_train_seq = tokenizer.texts_to_sequences(x_train)
x_test_seq = tokenizer.texts_to_sequences(x_test)

In [24]:
x_train_padded = pad_sequences(x_train_seq, maxlen = max_length, padding='post', truncating='post')
x_test_padded = pad_sequences(x_test_seq, maxlen = max_length, padding='post', truncating='post')

In [25]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 64 , input_length = max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
])



In [27]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [28]:
history = model.fit(x_train_padded, y_train, epochs=10, batch_size=32, validation_data=(x_test_padded,y_test))

Epoch 1/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.7268 - loss: 0.5211 - val_accuracy: 0.8757 - val_loss: 0.3007
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.8916 - loss: 0.2703 - val_accuracy: 0.8573 - val_loss: 0.3290
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.9077 - loss: 0.2337 - val_accuracy: 0.8801 - val_loss: 0.2939
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9181 - loss: 0.2092 - val_accuracy: 0.8687 - val_loss: 0.3186
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9238 - loss: 0.1992 - val_accuracy: 0.8740 - val_loss: 0.3081
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.9310 - loss: 0.1870 - val_accuracy: 0.8698 - val_loss: 0.3262
Epoch 7/10
[1m1

In [33]:
loss,accuracy = model.evaluate(x_test_padded,y_test)
accuracy

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8560 - loss: 0.4208  


0.8560000061988831

In [38]:
def predict_sentiment(review_text):
    seq = tokenizer.texts_to_sequences([review_text])
    padded = pad_sequences(seq, maxlen = max_length, padding='post', truncating = 'post')
    prediction = model.predict(padded)[0][0]

    sentiment = "positive" if prediction >= 0.5 else "negative"
    confidence = prediction if prediction >=0.5 else 1-prediction

    print(f"Review: {review_text}")
    print(f"Predicted Sentiment : {sentiment} (Confidence : {confidence:.2f})")

In [43]:
predict_sentiment("This movie was absolutely fantastic! I loved it.")
predict_sentiment("Terrible film.Bad plot, Bad acting. Waste of time and money")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
Review: This movie was absolutely fantastic! I loved it.
Predicted Sentiment : positive (Confidence : 1.00)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
Review: Terrible film.Bad plot, Bad acting. Waste of time and money
Predicted Sentiment : negative (Confidence : 0.97)
