# Modified Code IMDB Dataset

In [None]:
import tensorflow as tf
import keras
from keras import layers
from keras.datasets import imdb
import numpy as np

Input the data from the keras dataset

In [None]:
(train_data, train_label), (test_data, test_label) = imdb.load_data(num_words = 10000)
print(train_data.shape, train_label.shape)

(25000,) (25000,)


It is set up that 0 : negative and 1 : positive

In [None]:
max([max(sequence) for sequence in train_data])

9999

In [None]:
word_index = imdb.get_word_index()

reversed_word_index = dict(
    [(value, key) for (value,key) in word_index.items()]
)
#Decodes the review
decoded_review = " ".join(
    [reversed_word_index.get(i-3, "?") for i in train_data[0]]
)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


decoded_review : its offset by 3 since 0,1,2 are reserved for "padding", "start of sequence" and "unknown"

In [None]:
def vectorize_sequences(sequences, dimension = 10000) :
    results = np.zeros((len(sequences), dimension))
    for i , sequence in enumerate(sequences) :
      for j in sequence :
        results[i,j] = 1.
      return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

Vectorize the labels, using np.asarray

In [None]:
y_train= np.asarray(train_label).astype("float32")
y_test = np.asarray(test_label).astype("float32")

print(y_train[0])

1.0


In [None]:
model_IMDB = keras.Sequential([
    layers.Dense(64, activation = 'relu'),
    layers.Dense(16, activation = 'relu'),
    layers.Dense(32, activation = 'relu'),
    layers.Dense(1, activation = 'sigmoid')
])

Sigmoid Function :
$$
S(x) = \frac{1}{1-e^{-x}} = \frac{e^x}{e^x+1} = 1-S(-x)
$$

In [None]:
model_IMDB.compile(
    optimizer = keras.optimizers.RMSprop(0.1),
    loss = "mse",
    metrics = ["accuracy"]
)

Preparing the validation datas

In [None]:
x_val = x_train[:10000]
partial_x_train = x_train[10000:]

y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [None]:
History = model_IMDB.fit(
    partial_x_train,
    partial_y_train,
    epochs = 10,
    batch_size = 512,
    validation_data=(x_val, y_val)
)

History_dict = History.history
History_dict.keys()

Epoch 1/10

KeyboardInterrupt: ignored

In [None]:
results = model_IMDB.evaluate(x_test, y_test)
print(results)


import matplotlib.pyplot as plt
History_dict = History.history
loss_values = History_dict["loss"]
val_loss_values = History_dict["val_loss"]
epochs = range(1, len(loss_values) + 1)
plt.plot(
    epochs,
    loss_values, "bo",
    label = "Training Loss"
)
plt.plot(
    epochs,
    val_loss_values, "b",
    label = "Validation Loss"
)
plt.title("Training and Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.show()

Now a graph of the accuracy of both the Training and Validation


In [None]:
accuracy = History_dict["accuracy"]
val_accuracy = History_dict["val_accuracy"]
plt.clf()
plt.plot(
    epochs,
    accuracy,
    "bo",
    label = 'Training Accuracy'
)
plt.plot(
    epochs,
    val_accuracy,
    "b",
    label = 'Validation Accuracy'
)
plt.title("Training and Validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.grid()
plt.show()