In [2]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
import numpy as np

In [3]:
# Split the dataset into training and testing sets
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
# Store the vocabulary and class names
vocab=imdb.get_word_index()
class_names=['Negative', 'Positive']

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [5]:
# change word to index mapping to index-word mapping
reverse_index = dict([(value, key) for (key, value) in vocab.items()])
# Build a decoder function to convert integer indices back to words
def decode(review):
  text=""
  for i in review:
    text=text+reverse_index[i]
    text=text+" "
  return text
print(decode(x_train[1]))

the thought solid thought senator do making to is spot nomination assumed while he of jack in where picked as getting on was did hands fact characters to always life thrillers not as me can't in at are br of sure your way of little it strongly random to view of love it so principles of guy it used producer of where it of here icon film of outside to don't all unique some like of direction it if out her imagination below keep of queen he diverse to makes this stretch and of solid it thought begins br senator and budget worthwhile though ok and awaiting for ever better were and diverse for budget look kicked any to of making it out and follows for effects show to show cast this family us scenes more it severe making senator to and finds tv tend to of emerged these thing wants but and an beckinsale cult as it is video do you david see scenery it in few those are of ship for with of wild to one is very work dark they don't do dvd with those them 


In [6]:
# Add padding sequences to the data
x_train=pad_sequences(x_train, value=vocab['the'], padding='post', maxlen=256)
x_test=pad_sequences(x_test, value=vocab['the'], padding='post', maxlen=256)

#if length of review is 200 words, fill the 'the' wd to the review to make the length 256, otherwise delete 'the' words if overflow.

In [7]:
# Build the model
model=Sequential()
model.add(Embedding(10000,16)) #we need to embed text into neural network
model.add(GlobalAveragePooling1D()) #used to reduce dimensions/attibutes
model.add(Dense(16,activation='relu')) # dense layer is hidden, relu is the activation function, relu is used where non-linearity is present.
model.add(Dense(1,activation='sigmoid')) #sigmoid is used for binary classification
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# print(model.summary()).

In [8]:
# Train the model
model.fit(x_train, y_train, epochs=4, batch_size=128, verbose=1,validation_data=(x_test, y_test))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.src.callbacks.History at 0x7cf2736d46a0>

In [9]:
# Inference of the trained model
predicted_value=model.predict(np.expand_dims(x_test[10], 0)) #change the 10->12 for negative ans
print(predicted_value)
if predicted_value>0.5:
  final_value=1
else:
  final_value=0
print(final_value)
print(class_names[final_value])

[[0.8155994]]
1
Positive


In [10]:
# Evaluation of the model
loss, accuracy = model.evaluate(x_test, y_test)
print("Loss :",loss)
print(f"Accuracy (Test Data) : {round(accuracy*100)}%")

Loss : 0.29483911395072937
Accuracy (Test Data) : 88%
