Importing all Dependencies

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

Getting the dataset

In [2]:
sentiment = pd.read_csv("IMDB Dataset.csv")
sentiment

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


Replacing positive and negative strings to integer 1 & 0


Positive => 1


Negative => 0

In [3]:
sentiment.replace({"sentiment": {"positive":1, "negative":0}},inplace=True)

In [4]:
sentiment

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1
...,...,...
49995,I thought this movie did a down right good job...,1
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",0
49997,I am a Catholic taught in parochial elementary...,0
49998,I'm going to have to disagree with the previou...,0


Splitting data into training and testing data

In [5]:
train_data, test_data = train_test_split(sentiment, test_size=0.2)

Preprocessing the data to make it understanble for the machine i.e Tokeniing the data

In [6]:
token = Tokenizer(num_words=5000)
token.fit_on_texts(train_data["review"])
x_train = pad_sequences(token.texts_to_sequences(train_data["review"]), maxlen=200)
x_test = pad_sequences(token.texts_to_sequences(test_data["review"]), maxlen=200)

Getting the processed data

In [7]:
x_train

array([[   0,    0,    0, ...,    3,   38,  105],
       [   0,    0,    0, ...,    2,  759,  251],
       [ 160,   54,   21, ...,   42,    4,  155],
       ...,
       [   0,    0,    0, ...,  585,  543, 1149],
       [4732,   18,   61, ...,  373,    7,    7],
       [  12,    1,   17, ...,  188,   75,   17]])

In [8]:
x_test

array([[  30,  223,    1, ...,  439,   20,    3],
       [  14,    3, 1809, ...,  360,    4,  927],
       [   0,    0,    0, ...,  833,   11,   17],
       ...,
       [   0,    0,    0, ...,   23,   20,  179],
       [   0,    0,    0, ...,   14,   13, 4497],
       [   0,    0,    0, ...,  227,    7,    7]])

Testing Data

In [9]:
y_train = train_data["sentiment"]
y_test = test_data["sentiment"]

Builg a deep learning model

In [10]:
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation="sigmoid"))



Getting model summary

In [11]:
model.summary()

Compliling the model

In [12]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

Model Training

In [13]:
model.fit(x_train,y_train, epochs=5, batch_size=60, validation_split=0.2)

Epoch 1/5
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46041s[0m 86s/step - accuracy: 0.7113 - loss: 0.5388 - val_accuracy: 0.8350 - val_loss: 0.4079
Epoch 2/5
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 352ms/step - accuracy: 0.8402 - loss: 0.3740 - val_accuracy: 0.8537 - val_loss: 0.3529
Epoch 3/5
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 369ms/step - accuracy: 0.8680 - loss: 0.3180 - val_accuracy: 0.8736 - val_loss: 0.3175
Epoch 4/5
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 387ms/step - accuracy: 0.8854 - loss: 0.2865 - val_accuracy: 0.8589 - val_loss: 0.3391
Epoch 5/5
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 377ms/step - accuracy: 0.9069 - loss: 0.2403 - val_accuracy: 0.8701 - val_loss: 0.3307


<keras.src.callbacks.history.History at 0x1d6d41a9750>

Model Accuraccy

In [14]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Loss:{loss}")
print(f"Accuracy:{accuracy*100:.2f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 74ms/step - accuracy: 0.8702 - loss: 0.3270
Loss:0.33086898922920227
Accuracy:86.50


Creating Sentiment Prediction system model function

In [15]:
def sentiment_predictiion():
        review = input("Enter the review: ")
        sequence = token.texts_to_sequences([review])
        padded_sequence = pad_sequences(sequence, maxlen = 200)
        prediction = model.predict(padded_sequence)
        if prediction[0][0]>0.5 and prediction[0][0]<=1:
            print("Review is Positive.")
        else:
            print("Review is Negative.")

Example

In [16]:
sentiment_predictiion()

Enter the review: The Movie is good
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step
Review is Positive.


In [18]:
sentiment_predictiion()

Enter the review: This Movie is not that good
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Review is Negative.


In [20]:
sentiment_predictiion()

Enter the review: This movie is fantastic with awesome plots
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Review is Positive.


In [21]:
sentiment_predictiion()

Enter the review: Movie plot and dialogues are bad
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Review is Negative.


In [22]:
sentiment_predictiion()

Enter the review: The movie is ok
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Review is Positive.
