In [None]:
from keras.datasets import imdb

In [None]:
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

In [None]:
#print first training example
train_data[0]

In [None]:
#print first training label
train_labels[0]

In [None]:
#Note that we restricted ourself to the top 10,000 most frequently used words
max([max(sequence) for sequence in train_data])

In [None]:
#Now, let's decode a training example:
word_index = imdb.get_word_index() #this is a dictionary mapping from words to integers
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()]) #Reverses the mapping from integers to words
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]]) #Decodes the review. The first 3 indeces
                            #not words because 0, 1, and 2 are reserved for padding, start of sequence, and unkown respectively.
decoded_review

In [None]:
#now, let's convert our lists into vectors of 0's and 1's with length 10,000. (This is called one hot encoding)
import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension)) #creates an all zero matrix of shape (len(sequences), dimension)
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1. #sets specific indeces of results[i] to 1s
    return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [None]:
#here is what a sample would look like now
x_train[0]

In [None]:
#let us now also vectorize our labels
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

y_train[0]

In [None]:
#Tell tensorflow not to destroy my GPU (it won't destroy it, but it will take the GPU away from your display)
import keras

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

In [None]:
#Now, let us define our model
from keras import models
from keras import layers

#TODO

In [None]:
#Now, let us set aside 10,000 examples to test our accuracy on (this is called a validation set),
            #and use the rest to train our model.
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [None]:
x_train.shape

In [None]:
x_val.shape

In [None]:
partial_x_train.shape

In [None]:
history = model.fit(partial_x_train, partial_y_train, epochs = 20, batch_size = 1, validation_data=(x_val, y_val))

In [None]:
#model.fit returns something called a history object. this object has a member history, which is the dictionary
#        containing everything that happened during training.
history_dict = history.history
history_dict.keys()

In [None]:
import matplotlib.pyplot as plt

plt.clf() #this clears the figure
#acc = history.history['acc']

epochs = range(1, len(acc) + 1)
acc = history_dict['acc']
val_acc = history_dict['val_acc']
plt.plot(epochs, acc, 'bo', label = 'Training acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
#Test our model
results = model.evaluate(x_test, y_test)

In [None]:
results

In [None]:
#We can use our model to predict how likely a review is positive or negative
model.predict(x_test)