In [1]:
# Imports
from keras.datasets import imdb

from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics, activations

import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [None]:
# Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz

(xtrain, ytrain), (xtest, ytest) = imdb.load_data(num_words=10000)

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
 1048576/17464789 [>.............................] - ETA: 53:49

In [None]:
# Exploring the dataset

print("xtrain shape", xtrain.shape)
print("ytrain shape", ytrain.shape)
print()
print("xtest shape", xtest.shape)
print("ytest shape", ytest.shape)
print()
print("xtrain first review as dictionary index", xtrain[1])
print()
print()
print("ytrain label", ytrain[0])

In [None]:
# index to words mapping
word_index = imdb.get_word_index()

In [None]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

In [None]:
decode_review = " ".join(
    [reverse_word_index.get(i - 3, reverse_word_index.get(i)) for i in xtrain[22]]
)
decode_review

In [None]:
import numpy as np


def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.0
    return results


x_train = vectorize_sequences(xtrain)
x_test = vectorize_sequences(xtest)

In [None]:
ytrain = np.asarray(ytrain).astype("float32")
ytest = np.asarray(ytest).astype("float32")

In [None]:
# model
model = models.Sequential()
model.add(layers.Dense(16, activation=activations.relu, input_shape=(10000,)))
model.add(layers.Dense(16, activation=activations.relu))
model.add(layers.Dense(1, activation=activations.sigmoid))

In [None]:
model.compile(optimizer=optimizers.RMSprop(lr=0.0001), loss=losses.mse, metrics=["acc"])

In [None]:
x_val = x_train[:10000]
y_val = ytrain[:10000]

x_train_partial = x_train[10000:]
y_train_partial = ytrain[10000:]

In [None]:
history = model.fit(
    x_train_partial,
    y_train_partial,
    epochs=4,
    batch_size=512,
    validation_data=(x_val, y_val),
)
history_dict = history.history
history_dict.keys()
print(history.history["acc"][-1])
print(history.history["val_acc"][-1])

In [None]:
print(model.predict(x_train_partial[22:23]))

In [None]:
loss = history_dict["loss"]
val_loss = history_dict["val_loss"]
epochs = range(0, len(loss) + 1)
epochs

In [None]:
%matplotlib
acc = history.history["acc"]
val_acc = history.history["val_acc"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]

epochs = range(1, len(acc) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, "ro", label="Training loss")
# b is for "solid blue line"
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.show()

In [None]:
plt.clf()  # clear figure# clear
acc_values = history_dict["acc"]
val_acc_values = history_dict["val_acc"]

plt.plot(epochs, acc, "bo", label="Training acc")
plt.plot(epochs, val_acc, "b", label="Validation acc")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.show()