In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=10000)

In [66]:
word_index = keras.datasets.imdb.get_word_index()

reverse_word_index = {value+3: key for (key, value) in word_index.items()}
reverse_word_index[0] = "<PAD>"
reverse_word_index[1] = "<START>"
reverse_word_index[2] = "<OOV>"

In [67]:
def decode_review(text):
    return ''.join([reverse_word_index.get(i-3,'?') for i in text])

In [68]:
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=256)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=256)

In [69]:
model = keras.Sequential([
    layers.Embedding(input_dim=10000, output_dim=16),
    layers.GlobalAveragePooling1D(),
    layers.Dense(16, activation='relu'),
    layers.Dense(1,activation='sigmoid')
])

In [70]:
model.compile(
    optimizer = "adam",
    loss = "binary_crossentropy",
    metrics = ["accuracy"]
)

In [71]:
history = model.fit(
    x_train,
    y_train,
    epochs = 20,
    batch_size = 512,
    validation_split=0.2,
    verbose=1
)

Epoch 1/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.5996 - loss: 0.6897 - val_accuracy: 0.7144 - val_loss: 0.6816
Epoch 2/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6857 - loss: 0.6704 - val_accuracy: 0.7414 - val_loss: 0.6514
Epoch 3/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7424 - loss: 0.6300 - val_accuracy: 0.7718 - val_loss: 0.5997
Epoch 4/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7945 - loss: 0.5671 - val_accuracy: 0.7980 - val_loss: 0.5327
Epoch 5/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8224 - loss: 0.4975 - val_accuracy: 0.8200 - val_loss: 0.4711
Epoch 6/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8360 - loss: 0.4389 - val_accuracy: 0.8388 - val_loss: 0.4256
Epoch 7/20
[1m40/40[0m [32m━━━━━

In [72]:
results = model.evaluate(x_test, y_test)
print(f"\n Test Accuracy = {results[1]*100:.2f}%")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8807 - loss: 0.2883

 Test Accuracy = 88.07%


In [73]:
import re

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]","",text)
    return text

In [74]:
def encode_review(text):
    words = clean_text(text).split()
    encoded = [1]
    for word in words:
        index = word_index.get(word,2) + 3
        encoded.append(index)
    padded = keras.preprocessing.sequence.pad_sequences([encoded],maxlen=256, padding='post')
    return padded   

In [78]:
customer_review = "good and beautiful"
encoded = encode_review(customer_review)
print("Encoded = ",encoded[0][:30])
prediction = model.predict(encoded)

Encoded =  [  1  52   5 307   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


In [76]:
print('movie' in word_index)
print(word_index["good"])

True
49


In [79]:
print(prediction)
print(f"Sentiment Score : {prediction[0][0]:4f}")
print("Sentiment: ","Positive" if prediction[0][0] > 0.5 else "Negative")

[[0.66142917]]
Sentiment Score : 0.661429
Sentiment:  Positive


In [80]:
import random

In [92]:
i = random.randint(0, len(x_test)-1)
review = x_test[i]
true_label = y_test[i]

padded_review = keras.preprocessing.sequence.pad_sequences([review], maxlen=256, padding='post')
pred = model.predict(padded_review)

pred_label = 1 if pred>0.5 else 0

print("\n Review:")
print(decode_review(review)[:500]+"...")
print("Actual sentiment : ","Positive" if true_label == 1 else "Negative")
print("Predicted sentiment : ","Positive" if pred_label == 1 else "Negative")
print(f"Confidence Score : {pred[0][0]:4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step

 Review:
??????????????????????????????????????????????????????????????????????????????brcomeinasfilmsnowhappenyoueditingandgustoisforstupidforgetsisasexyandhiswomanisian?kindhavethe?britainthetooitsamwasfilmthat?takeverybr2betterbutworstif<START>herothe<START>asa<OOV>stupidwho<START>?1953<START>needinga<START>meldoesn'tlifeaunless<START>imagineai'velargewas?moneytheanimatorallentertaining?kindisadaybutandhissuitablyto<START>wouldbrarthuristhemselvesisiwaitingthanksisito<START>actedemotionalhidewasoutsou...
Actual sentiment :  Positive
Predicted sentiment :  Positive
Confidence Score : 0.864251


In [94]:
model.save("sentiment_model.keras")

In [95]:
import pickle

with open("word_index.pkl", "wb") as f:
    pickle.dump(word_index, f)


In [97]:
from tensorflow import keras
import pickle

# Load model
model = keras.models.load_model("sentiment_model.keras")

# Load word index
with open("word_index.pkl", "rb") as f:
    word_index = pickle.load(f)


In [99]:
review = input("Enter a review: ")
original = input("Enter actual sentiment: ")
encoded = encode_review(review)
prediction = model.predict(encoded)

print("The review : ", review)
print("Actual sentiment: ", original)
print(f"Sentiment Score: {prediction[0][0]:.4f}")
print("Predicted Sentiment:", "Positive" if prediction[0][0] > 0.5 else "Negative")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
The review :  The movie was horrible
Actual sentiment:  Negative
Sentiment Score: 0.3571
Predicted Sentiment: Negative
