In [1]:
from __future__ import print_function
import numpy as np
np.random.seed(1337)

from keras.preprocessing import sequence
from keras.models import Model
from keras.layers import Dense, Activation, Embedding, Flatten, Input, Conv1D, MaxPooling1D, GlobalMaxPooling1D, LSTM, Dot
from keras.layers.advanced_activations import PReLU
from keras.datasets import imdb

Using TensorFlow backend.


In [2]:
max_features = 20000
maxlen = 80  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

In [3]:
print('Loading data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

Loading data...




25000 train sequences
25000 test sequences


In [4]:
print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

Pad sequences (samples x time)
X_train shape: (25000, 80)
X_test shape: (25000, 80)


In [10]:
print('Build model...')
inputs = Input(shape=(maxlen,))
x = inputs
x = Embedding(max_features, 128, dropout=0.2)(x)

x1 = LSTM(32)(x)

x2 = Conv1D(filters=32, kernel_size=3, padding="same")(x)
x2 = GlobalMaxPooling1D()(x2)

x = Dot(axes=-1)([x1, x2])
x = Dense(1)(x)
predictions = Activation("sigmoid")(x)


model = Model(input=inputs, output=predictions)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Build model...


  after removing the cwd from sys.path.
  app.launch_new_instance()


In [11]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 80)           0                                            
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 80, 128)      2560000     input_4[0][0]                    
__________________________________________________________________________________________________
conv1d_4 (Conv1D)               (None, 80, 32)       12320       embedding_4[0][0]                
__________________________________________________________________________________________________
lstm_4 (LSTM)                   (None, 32)           20608       embedding_4[0][0]                
__________________________________________________________________________________________________
global_max

In [12]:
print('Train...')
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
          validation_data=(X_test, y_test))
score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Train...


  This is separate from the ipykernel package so we can avoid doing imports until


Train on 25000 samples, validate on 25000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test score: 1.192910744406581
Test accuracy: 0.81056
