In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [None]:
data = pd.read_csv("/content/IMDB Dataset.csv")

In [None]:
data.replace({"sentiment": {"positive": 1, "negative": 0}}, inplace=True)

In [None]:
data["sentiment"].value_counts()

Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
1,25000
0,25000


In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SimpleRNN
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
train_data, test_data = train_test_split(data, test_size = 0.2, random_state=42)

In [None]:
tokenizer = Tokenizer(num_words = 5000)
tokenizer.fit_on_texts(train_data["review"])

In [None]:
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]), maxlen=200)

In [None]:
Y_train = train_data["sentiment"]
Y_test = test_data["sentiment"]

In [None]:
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
model.add(SimpleRNN(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation="sigmoid"))

model.build(input_shape=(None, 200))

model.summary()

In [None]:
model.compile(optimizer = "adam", loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
model.fit(X_train, Y_train, epochs = 5, batch_size = 64, validation_split = 0.2)

Epoch 1/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 118ms/step - accuracy: 0.5178 - loss: 0.7060 - val_accuracy: 0.5502 - val_loss: 0.6752
Epoch 2/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 117ms/step - accuracy: 0.5926 - loss: 0.6609 - val_accuracy: 0.6139 - val_loss: 0.6312
Epoch 3/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 119ms/step - accuracy: 0.6694 - loss: 0.5953 - val_accuracy: 0.6919 - val_loss: 0.5750
Epoch 4/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 117ms/step - accuracy: 0.7293 - loss: 0.5267 - val_accuracy: 0.6405 - val_loss: 0.7243
Epoch 5/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 120ms/step - accuracy: 0.7730 - loss: 0.4792 - val_accuracy: 0.7498 - val_loss: 0.5379


<keras.src.callbacks.history.History at 0x7bbf13f2f690>

In [None]:
model.save("model.h5")



In [None]:
import joblib
joblib.dump(tokenizer, "tokenizer.pkl")

['tokenizer.pkl']

In [None]:
loss, accuracy = model.evaluate(X_test, Y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.6875 - loss: 0.5753


In [None]:
print(loss)

0.573019802570343


In [None]:
print(accuracy)

0.6884999871253967


In [None]:
def predictive_system(review):
  sequences = tokenizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequences, maxlen=200)
  prediction = model.predict(padded_sequence)
  sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
  return sentiment

In [None]:
predictive_system("This movie was fantastic and amazing")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step


'positive'

In [None]:
predictive_system("A trilling adventure with stunning visual")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


'positive'

In [None]:
predictive_system("A visual masterpiece")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


'positive'

In [None]:
predictive_system("Overall long and slow")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step


'negative'