In [20]:
# ------------------------------------------------------------------------
# Initial Import.
# We are going to use keras to build NN models.
# ------------------------------------------------------------------------
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.preprocessing import sequence
from keras.models import Model
from keras.layers import Dense, Activation, Embedding, Flatten, Input, Dropout, Conv1D, GlobalMaxPooling1D, recurrent
from keras.datasets import imdb

In [2]:
# ------------------------------------------------------------------------
# Data pre-processing.
# Including load data.
# ------------------------------------------------------------------------
max_features = 20000
maxlen = 80  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

print('Loading data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

Loading data...
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz




25000 train sequences
25000 test sequences


In [3]:
print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=maxlen) # sequences: cut words, just keep 80 of them.
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

Pad sequences (samples x time)
X_train shape: (25000, 80)
X_test shape: (25000, 80)


In [4]:
print('Build model...')


inputs = Input(shape=(maxlen,))
x = inputs
x = Embedding(max_features, 128, dropout=0.2)(x)
x = Dense(64, activation = 'relu')(x)
x = Flatten()(x)
x = Dense(1)(x)
predictions = Activation("sigmoid")(x)


model = Model(input=inputs, output=predictions)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Build model...


  
  del sys.path[0]


In [5]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 80)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 80, 128)           2560000   
_________________________________________________________________
dense_1 (Dense)              (None, 80, 64)            8256      
_________________________________________________________________
flatten_1 (Flatten)          (None, 5120)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 5121      
_________________________________________________________________
activation_1 (Activation)    (None, 1)                 0         
Total params: 2,573,377
Trainable params: 2,573,377
Non-trainable params: 0
_________________________________________________________________


In [6]:
print('Train...')
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
          validation_data=(X_test, y_test))
score, acc = model.evaluate(X_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Train...


  This is separate from the ipykernel package so we can avoid doing imports until


Train on 25000 samples, validate on 25000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test score: 1.1105385148486495
Test accuracy: 0.82084


In [9]:
print('Build model2...')


inputs = Input(shape=(maxlen,))
x = inputs
x = Embedding(max_features, 128, dropout=0.2)(x)
x = Dense(64, activation = 'relu')(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(1)(x)
predictions = Activation("sigmoid")(x)


model2 = Model(input=inputs, output=predictions)
model2.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Build model2...


  
  


In [10]:
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 80)                0         
_________________________________________________________________
embedding_3 (Embedding)      (None, 80, 128)           2560000   
_________________________________________________________________
dense_4 (Dense)              (None, 80, 64)            8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 80, 64)            0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 5120)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 5121      
_________________________________________________________________
activation_2 (Activation)    (None, 1)                 0         
Total para

In [13]:
print('Train...')
model2.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
          validation_data=(X_test, y_test))
score, acc = model2.evaluate(X_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Train...
Train on 25000 samples, validate on 25000 samples
Epoch 1/15


  This is separate from the ipykernel package so we can avoid doing imports until


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test score: 1.757807836009562
Test accuracy: 0.80164


In [17]:
print('Build model3...')


inputs = Input(shape=(maxlen,))
x = inputs
x = Embedding(max_features, 128, dropout=0.2)(x)
x = Conv1D(filters = 32, kernel_size = 5,padding='Same', activation='relu')(x)
x = GlobalMaxPooling1D()(x)
#x = Flatten()(x)
x = Dense(1)(x)
predictions = Activation("sigmoid")(x)


model3 = Model(input=inputs, output=predictions)
model3.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Build model3...


  
  


In [18]:
model3.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 80)                0         
_________________________________________________________________
embedding_6 (Embedding)      (None, 80, 128)           2560000   
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 80, 32)            20512     
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 32)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 33        
_________________________________________________________________
activation_3 (Activation)    (None, 1)                 0         
Total params: 2,580,545
Trainable params: 2,580,545
Non-trainable params: 0
_________________________________________________________________


In [19]:
print('Train...')
model3.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
          validation_data=(X_test, y_test))
score, acc = model3.evaluate(X_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Train...


  This is separate from the ipykernel package so we can avoid doing imports until


Train on 25000 samples, validate on 25000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test score: 0.8360491146403551
Test accuracy: 0.84008


In [23]:
print('Build model4...')


inputs = Input(shape=(maxlen,))
x = inputs
x = Embedding(max_features, 128, dropout=0.2)(x)
x = recurrent.LSTM(128)(x)
#x = GlobalMaxPooling1D()(x)
#x = Flatten()(x)
x = Dense(1)(x)
predictions = Activation("sigmoid")(x)


model4 = Model(input=inputs, output=predictions)
model4.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Build model4...


  
  


In [24]:
model4.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 80)                0         
_________________________________________________________________
embedding_9 (Embedding)      (None, 80, 128)           2560000   
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 129       
_________________________________________________________________
activation_5 (Activation)    (None, 1)                 0         
Total params: 2,691,713
Trainable params: 2,691,713
Non-trainable params: 0
_________________________________________________________________


In [25]:
print('Train...')
model4.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
          validation_data=(X_test, y_test))
score, acc = model4.evaluate(X_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Train...


  This is separate from the ipykernel package so we can avoid doing imports until


Train on 25000 samples, validate on 25000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test score: 0.9953275058835744
Test accuracy: 0.82136
