In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
imdb = keras.datasets.imdb

In [3]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000,
                                                      skip_top=10,
                                                      maxlen=255,
                                                      seed=123,
                                                      start_char=1,
                                                      oov_char=2,
                                                      index_from=3)

In [4]:
word_2_idx = imdb.get_word_index()
idx_2_word = {}

for key, value in word_2_idx.items():
    idx_2_word[value] = key

In [5]:
print (sorted(idx_2_word.items())[:10])

[(1, 'the'), (2, 'and'), (3, 'a'), (4, 'of'), (5, 'to'), (6, 'is'), (7, 'br'), (8, 'in'), (9, 'it'), (10, 'i')]


In [6]:
x_train = keras.preprocessing.sequence.pad_sequences(x_train,
                                                        value=0,
                                                        padding='post',
                                                        maxlen=255)
x_test = keras.preprocessing.sequence.pad_sequences(x_test,
                                                        value=0,
                                                        padding='post',
                                                        maxlen=255)

Test for later:
>model = tf.estimator.DNNClassifier(hidden_units=[128, 64, 32],
>                                   feature_columns=___,
>                                   model_dir='/DNNClassifier',
>                                   n_classes=2,
>                                   optimizer='Adam',
>                                   activation_fn=tf.nn.relu,
>                                   dropout=0.3,
>                                   batch_norm=True)

In [7]:
model = keras.Sequential()
model.add(
    keras.layers.Embedding(input_dim=5000,
                           output_dim=12,
                           embeddings_initializer=keras.initializers.TruncatedNormal(
                               mean=0.0, stddev=0.5, seed=123)))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(
    keras.layers.Dense(units=64,
                       activation=tf.nn.relu))
model.add(
    keras.layers.Dense(units=32,
                       activation=tf.nn.relu))
model.add(
    keras.layers.Dense(units=1,
                       activation=tf.nn.sigmoid))

In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 12)          60000     
_________________________________________________________________
global_average_pooling1d (Gl (None, 12)                0         
_________________________________________________________________
dense (Dense)                (None, 64)                832       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 62,945
Trainable params: 62,945
Non-trainable params: 0
_________________________________________________________________


In [9]:
tb_cp = keras.callbacks.TensorBoard(log_dir='./logs',
                                    histogram_freq=5,
                                    write_graph=True,
                                    write_grads=True,
                                    batch_size=32,
                                    write_images=True)

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_val, Y_train, Y_val = train_test_split(
    x_train, y_train,
    test_size=0.25, random_state=123)

In [12]:
print(len(X_train[211]))
print(X_train[211])

255
[   2   14  641    2   18    2  119   65    2 3595    2   34  107  389
  354   34    2    2    2    2    2    2   16  115   53  307   17    2
    2    2 1696    2  255   37  892   41 1014   11    2    2    2    2
    2 1708    2  731    2   23   27 1303   10   10   50    2    2   87
  696  239   34    2    2   17    2    2   35 1732   17  210 3753    2
  198  290    2 1866    2    2   93  200    2    2    2  255  467    2
 2398   14    2   91    2   22   15  144 1229    2  483    2  259  262
    2    2    2   37 2308   27  108   26   99  196    2    2 1185   33
  211   93   61  350  747   15  291    2   24  110   34  195   84   35
  709   46    2  158    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
  

In [13]:
model.compile(optimizer=tf.train.AdamOptimizer(),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [14]:
X_train.shape

(18750, 255)

In [15]:
history = model.fit(X_train,
                    Y_train,
                    epochs=30,
                    batch_size=32,
                    validation_data=(X_val, Y_val),
                    verbose=1,
                    callbacks=[tb_cp])

Train on 18750 samples, validate on 6250 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
