In [51]:
import pandas as pd
from google.colab import files
uploaded = files.upload()
df = pd.read_csv("IMDB Dataset.csv")
print(df.head())


Saving IMDB Dataset.csv to IMDB Dataset (2).csv
                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive


In [52]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
max_features = 10000
max_length = 200
tokenizer = Tokenizer(num_words=max_features, oov_token="<OOV>")
tokenizer.fit_on_texts(df['review'])
X = tokenizer.texts_to_sequences(df['review'])
X = pad_sequences(X, maxlen=max_length)
y = df['sentiment'].apply(lambda x: 1 if x.lower() == 'positive' else 0).values


In [53]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [54]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Model defining
model = Sequential([
    Embedding(input_dim=max_features, output_dim=64, input_length=max_length),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()




In [55]:
# Model Training
epochs = 5
batch_size = 64

history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 19ms/step - accuracy: 0.7552 - loss: 0.4677 - val_accuracy: 0.8892 - val_loss: 0.2621
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.9109 - loss: 0.2335 - val_accuracy: 0.8809 - val_loss: 0.2768
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.9303 - loss: 0.1852 - val_accuracy: 0.8919 - val_loss: 0.2932
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - accuracy: 0.9566 - loss: 0.1236 - val_accuracy: 0.8731 - val_loss: 0.3561
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.9629 - loss: 0.1061 - val_accuracy: 0.8818 - val_loss: 0.3714


In [69]:
# Prediction Making
custom_review = "Predictable and boring. I've seen much better movies"
sequences = tokenizer.texts_to_sequences([custom_review])
padded_review = pad_sequences(sequences, maxlen=max_length)

predicted_sentiment = model.predict(padded_review)
sentiment_label = "Positive" if predicted_sentiment[0][0] > 0.5 else "Negative"
print("Predicted Sentiment:", sentiment_label)
print("Confidence Score:", predicted_sentiment[0][0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Predicted Sentiment: Negative
Confidence Score: 0.0033545769
