In [8]:
#
#   This program will classify reviews from IMDB based on sentiment, positive or
#   negative.  We will used the IMDB database that comes with Keras. 
#   This data has already preprocessed the reviews.  This preprocessing 
#   replaces the actual works with the encoding.  So the second most 
#   popular word is replaced by 2, third most popular by 3, etc.    

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from keras.callbacks import EarlyStopping
from keras.datasets import imdb

#   Supress warning and informational messages
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 


Using TensorFlow backend.


In [4]:
!pip install tensorflow

Collecting tensorflow
  Downloading https://files.pythonhosted.org/packages/1c/43/37bfc57f0bafa74f7cf55ad3fd3edf6fd32ff88fdd1f84bc52448fb04f10/tensorflow-1.9.0-cp34-cp34m-manylinux1_x86_64.whl (51.7MB)
[K    100% |ââââââââââââââââââââââââââââââââ| 51.7MB 6.0kB/s eta 0:00:01   16% |ââââââ                          | 8.4MB 4.7MB/s eta 0:00:10    37% |ââââââââââââ                    | 19.3MB 1.3MB/s eta 0:00:25
[?25hCollecting tensorboard<1.10.0,>=1.9.0 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/9e/1f/3da43860db614e294a034e42d4be5c8f7f0d2c75dc1c428c541116d8cdab/tensorboard-1.9.0-py3-none-any.whl (3.3MB)
[K    100% |ââââââââââââââââââââââââââââââââ| 3.3MB 169kB/s eta 0:00:01
[?25hCollecting termcolor>=1.1.0 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/8a/48/a76be51647d0e

In [5]:
!pip install keras

Collecting keras
  Downloading https://files.pythonhosted.org/packages/34/7d/b1dedde8af99bd82f20ed7e9697aac0597de3049b1f786aa2aac3b9bd4da/Keras-2.2.2-py2.py3-none-any.whl (299kB)
[K    100% |ââââââââââââââââââââââââââââââââ| 307kB 1.1MB/s ta 0:00:01
Collecting keras-preprocessing==1.0.2 (from keras)
  Downloading https://files.pythonhosted.org/packages/71/26/1e778ebd737032749824d5cba7dbd3b0cf9234b87ab5ec79f5f0403ca7e9/Keras_Preprocessing-1.0.2-py2.py3-none-any.whl
Collecting keras-applications==1.0.4 (from keras)
  Downloading https://files.pythonhosted.org/packages/54/90/8f327deaa37a71caddb59b7b4aaa9d4b3e90c0e76f8c2d1572005278ddc5/Keras_Applications-1.0.4-py2.py3-none-any.whl (43kB)
[K    100% |ââââââââââââââââââââââââââââââââ| 51kB 3.4MB/s ta 0:00:011
Installing collected packages: keras-preprocessing, keras-applications, keras
Successfully installed keras

In [9]:
#   Set parameters for data to use
NUM_WORDS = 6000        # the top most n frequent words to consider
SKIP_TOP = 2            # Skip the top most words that are likely (the, and, a)
MAX_REVIEW_LEN = 100    # Max number of words from a review.

In [10]:
#   Load pre-processed sentiment classified review data from IMDB Database
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = NUM_WORDS,
                                        skip_top=SKIP_TOP)
#   Print a sample
#  returns word index vector (ex. [2, 4, 2, 2, 33, 2804, ...]) and class (0 or 1) 
print("encoded word sequence:", x_train[3], "class:", y_train[3])  

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
encoded word sequence: [2, 4, 2, 2, 33, 2804, 4, 2040, 432, 111, 153, 103, 4, 1494, 13, 70, 131, 67, 11, 61, 2, 744, 35, 3715, 761, 61, 5766, 452, 2, 4, 985, 7, 2, 59, 166, 4, 105, 216, 1239, 41, 1797, 9, 15, 7, 35, 744, 2413, 31, 8, 4, 687, 23, 4, 2, 2, 6, 3693, 42, 38, 39, 121, 59, 456, 10, 10, 7, 265, 12, 575, 111, 153, 159, 59, 16, 1447, 21, 25, 586, 482, 39, 4, 96, 59, 716, 12, 4, 172, 65, 9, 579, 11, 2, 4, 1615, 5, 2, 7, 5168, 17, 13, 2, 12, 19, 6, 464, 31, 314, 11, 2, 6, 719, 605, 11, 8, 202, 27, 310, 4, 3772, 3501, 8, 2722, 58, 10, 10, 537, 2116, 180, 40, 14, 413, 173, 7, 263, 112, 37, 152, 377, 4, 537, 263, 846, 579, 178, 54, 75, 71, 476, 36, 413, 263, 2504, 182, 5, 17, 75, 2306, 922, 36, 279, 131, 2895, 17, 2867, 42, 17, 35, 921, 2, 192, 5, 1219, 3890, 19, 2, 217, 4122, 1710, 537, 2, 1236, 5, 736, 10, 10, 61, 403, 9, 2, 40, 61, 4494, 5, 27, 4494, 159, 90, 263, 2311, 4319, 309, 8, 178, 5, 82, 4319, 4, 65, 15

In [11]:
#   Pad and truncate the review word sequences so they are all the same length
x_train = sequence.pad_sequences(x_train, maxlen = MAX_REVIEW_LEN)
x_test = sequence.pad_sequences(x_test, maxlen = MAX_REVIEW_LEN)
print('x_train.shape:', x_train.shape, 'x_test.shape:', x_test.shape)


x_train.shape: (25000, 100) x_test.shape: (25000, 100)


In [12]:
#   The Model
model = Sequential()
model.add(Embedding(NUM_WORDS, 64 ))
model.add(LSTM(64, dropout=0.3, recurrent_dropout=0.3))
model.add(Dense(1, activation='sigmoid'))


In [13]:
#   Compile
model.compile(loss='binary_crossentropy',  
            optimizer='adam',              
            metrics=['accuracy'])

In [14]:
#   Train
BATCH_SIZE = 24
EPOCHS = 5
cbk_early_stopping = EarlyStopping(monitor='val_acc', patience=2, mode='max')
model.fit(x_train, y_train, BATCH_SIZE, epochs=EPOCHS, 
            validation_data=(x_test, y_test), 
            callbacks=[cbk_early_stopping] )

Train on 25000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7efde8be0668>

In [15]:
score, acc = model.evaluate(x_test, y_test,
                            batch_size=BATCH_SIZE)
print('test score:', score, ' test accuracy:', acc)

test score: 0.3772685106420517  test accuracy: 0.8360400001716614


In [None]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

In [18]:
model.save('my_model.h5')

In [20]:
os.getcwd()

'/home/nbuser/library'

In [21]:
mdesc=model.to_json()
print(mdesc);

{"keras_version": "2.2.2", "config": [{"config": {"embeddings_regularizer": null, "embeddings_initializer": {"config": {"minval": -0.05, "seed": null, "maxval": 0.05}, "class_name": "RandomUniform"}, "name": "embedding_1", "trainable": true, "activity_regularizer": null, "mask_zero": false, "input_length": null, "output_dim": 64, "batch_input_shape": [null, null], "embeddings_constraint": null, "dtype": "float32", "input_dim": 6000}, "class_name": "Embedding"}, {"config": {"go_backwards": false, "dropout": 0.3, "bias_constraint": null, "trainable": true, "bias_regularizer": null, "recurrent_constraint": null, "units": 64, "recurrent_activation": "hard_sigmoid", "kernel_constraint": null, "unroll": false, "activation": "tanh", "recurrent_dropout": 0.3, "return_state": false, "use_bias": true, "activity_regularizer": null, "unit_forget_bias": true, "stateful": false, "name": "lstm_1", "implementation": 1, "recurrent_initializer": {"config": {"seed": null, "gain": 1.0}, "class_name": "Ort

In [22]:
model.get_weights()

[array([[ 0.0555384 ,  0.07895374, -0.03360911, ...,  0.00047985,
         -0.00902827, -0.07634516],
        [-0.00765518, -0.00212397, -0.00125753, ...,  0.00825204,
          0.01772101, -0.01129544],
        [-0.00361234,  0.06540594,  0.01748554, ..., -0.04555601,
          0.01727996,  0.00097645],
        ...,
        [-0.01037923, -0.09445988,  0.0595802 , ..., -0.02510096,
         -0.06820573, -0.05356245],
        [ 0.00830153,  0.00344244, -0.16342099, ...,  0.07311364,
          0.01306194, -0.07307316],
        [-0.02612214, -0.02528159, -0.06401911, ..., -0.04481454,
         -0.05810652, -0.10473937]], dtype=float32),
 array([[-0.20400359, -0.20303278, -0.12162951, ...,  0.14320262,
         -0.1258284 , -0.35484505],
        [-0.27099136,  0.0549292 , -0.12434935, ...,  0.1698512 ,
         -0.04352328, -0.1664506 ],
        [-0.11369728,  0.00057958,  0.06483357, ...,  0.29014498,
          0.09484687, -0.09884609],
        ...,
        [ 0.13351877,  0.04999574, -0.1