In [37]:
from __future__ import print_function
import numpy as np
np.random.seed(1337)

from keras.preprocessing import sequence
from keras.models import Model
from keras.layers import Dense, Activation, Embedding, Flatten, Input, Conv1D, MaxPooling1D, GlobalMaxPooling1D
from keras.layers.advanced_activations import PReLU
from keras.datasets import imdb

In [9]:
max_features = 20000
maxlen = 80  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

In [84]:
print('Loading data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

Loading data...




25000 train sequences
25000 test sequences


In [85]:
print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

Pad sequences (samples x time)
X_train shape: (25000, 80)
X_test shape: (25000, 80)


In [86]:
X_train

array([[   15,   256,     4, ...,    19,   178,    32],
       [  125,    68,     2, ...,    16,   145,    95],
       [  645,   662,     8, ...,     7,   129,   113],
       ...,
       [  529,   443, 17793, ...,     4,  3586,     2],
       [  286,  1814,    23, ...,    12,     9,    23],
       [   97,    90,    35, ...,   204,   131,     9]], dtype=int32)

In [88]:
print('Build model...')
inputs = Input(shape=(maxlen,))
x = inputs
x = Embedding(max_features, 128, dropout=0.2)(x)


#x = Dense(64, activation = "relu")(x)

x = GlobalMaxPooling1D()(x)
#x = Flatten()(x)
#x = Dense(64)(x)
x = Dense(1)(x)
predictions = Activation("sigmoid")(x)


model = Model(input=inputs, output=predictions)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Build model...


  after removing the cwd from sys.path.
  app.launch_new_instance()


In [89]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_32 (InputLayer)        (None, 80)                0         
_________________________________________________________________
embedding_7 (Embedding)      (None, 80, 128)           2560000   
_________________________________________________________________
conv1d_28 (Conv1D)           (None, 80, 32)            12320     
_________________________________________________________________
global_max_pooling1d_14 (Glo (None, 32)                0         
_________________________________________________________________
dense_45 (Dense)             (None, 1)                 33        
_________________________________________________________________
activation_23 (Activation)   (None, 1)                 0         
Total params: 2,572,353
Trainable params: 2,572,353
Non-trainable params: 0
_________________________________________________________________


In [90]:
print('Train...')
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
          validation_data=(X_test, y_test))
score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Train...


  This is separate from the ipykernel package so we can avoid doing imports until


Train on 25000 samples, validate on 25000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test score: 0.8385325213527679
Test accuracy: 0.83772
