In [None]:
from tensorflow.keras.datasets import imdb
(train_input, train_target), (test_input, test_target) = imdb.load_data(num_words = 500)

In [None]:
print(train_input.shape, test_input.shape)

In [None]:
print(len(train_input[1]))

In [None]:
print(train_input[0])

In [None]:
print(train_target[:20])

In [None]:
from sklearn.model_selection import train_test_split
train_input, val_input, train_target, val_target = train_test_split(train_input, train_target, test_size = 0.2, random_state = 42)

In [None]:
import numpy as np
lengths = np.array([len(x) for x in train_input])

In [None]:
print(np.mean(lengths), np.median(lengths))

In [None]:
import matplotlib.pyplot as plt
plt.hist(lengths)
plt.xlabel('length')
plt.ylabel('freqency')
plt.show()

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
train_seq = pad_sequences(train_input, maxlen = 100)

In [None]:
print(train_seq.shape)

In [None]:
print(train_seq[0])

In [None]:
print(train_input[0][-10:])

In [None]:
print(train_input[0][:10])

In [None]:
print(train_seq[5])

In [None]:
val_seq = pad_sequences(val_input, maxlen = 100)

In [None]:
from tensorflow import keras
model = keras.Sequential()
model.add(keras.layers.SimpleRNN(8, input_shape = (100, 500)))
model.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [None]:
train_oh = keras.utils.to_categorical(train_seq)

In [None]:
print(train_oh.shape)

In [None]:
print(train_oh[0][0][:12])

In [None]:
print(np.sum(train_oh[0][0]))

In [None]:
val_oh = keras.utils.to_categorical(val_seq)

In [None]:
model.summary()

In [None]:
rmsprop = keras.optimizers.RMSprop(learning_rate = 1e-4)
model.compile(optimizer = rmsprop, loss = 'binary_crossentropy', metrics = 'accuracy')
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-simplernn-model.h5')
earlystopping_cb = keras.callbacks.EarlyStopping(patience = 3, restore_best_weights = True)
history = model.fit(train_oh, train_target, epochs = 100, batch_size = 64,
                    validation_data = (val_oh, val_target),
                    callbacks = [checkpoint_cb, earlystopping_cb])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()

In [None]:
print(train_seq.nbytes, train_oh.nbytes)

In [None]:
model2 = keras.Sequential()
model2.add(keras.layers.Embedding(500, 16, input_length = 100))
model2.add(keras.layers.SimpleRNN(8))
model2.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [None]:
model2.summary()

In [None]:
rmsprop = keras.optimizers.RMSprop(learning_rate = 1e-4)
model2.compile(optimizer = rmsprop, loss = 'binary_crossentropy', metrics = 'accuracy')
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-embedding-model.h5')
earlystopping_cb = keras.callbacks.EarlyStopping(patience = 3, restore_best_weights = True)
history = model2.fit(train_seq, train_target, epochs = 100, batch_size = 64,
                     validation_data = (val_seq, val_target),
                     callbacks = [checkpoint_cb, earlystopping_cb])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()