In [20]:
import pandas as pd
from keras.layers import LSTM, Dense, Embedding, Flatten, Conv1D, MaxPool1D, Dropout
from keras.optimizers import Adam
from keras.preprocessing.text import text_to_word_sequence, Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras import Sequential
import numpy as np
from keras.utils import np_utils
from keras.datasets import imdb

In [3]:
vocab_size = 5000
(trainX, trainY), (testX, testY) = imdb.load_data(num_words=vocab_size)

In [4]:
trainX.shape

(25000,)

In [5]:
trainY.shape

(25000,)

In [6]:
testX.shape

(25000,)

In [7]:
testY.shape

(25000,)

In [8]:
embedding_dim = 32

In [9]:
max_length = max([len(x) for x in trainX + testX])
max_length

2697

In [10]:
max_sequence_length = 500

In [11]:
number_of_words = max([max(x) for x in trainX + testX]) + 1
number_of_words

5000

In [12]:
train_sequences = pad_sequences(sequences=trainX, maxlen=max_sequence_length, padding='post')

In [13]:
test_sequences = pad_sequences(sequences=testX, maxlen=max_sequence_length, padding='post')

In [14]:
train_label_one_hot = np_utils.to_categorical(np.array(trainY))
test_label_one_hot = np_utils.to_categorical(np.array(testY))

In [15]:
len(train_label_one_hot[0])

2

In [21]:
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_sequence_length))
model.add(Conv1D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPool1D(pool_size=2, padding='Same'))
model.add(Flatten())
model.add(Dense(units=150))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [22]:
from keras.callbacks import ModelCheckpoint

In [23]:
chkpoint = ModelCheckpoint('weights-cnn-{epoch:02d}-{val_loss:.2f}.hdf5', verbose=1, monitor='val_loss', mode='min')

In [24]:
model.fit(x=train_sequences, y=trainY, epochs=5, batch_size=128, callbacks=[chkpoint], validation_data=(test_sequences, testY))

Train on 25000 samples, validate on 25000 samples
Epoch 1/5

Epoch 00001: saving model to weights-cnn-01-0.31.hdf5
Epoch 2/5

Epoch 00002: saving model to weights-cnn-02-0.30.hdf5
Epoch 3/5

Epoch 00003: saving model to weights-cnn-03-0.36.hdf5
Epoch 4/5

Epoch 00004: saving model to weights-cnn-04-0.52.hdf5
Epoch 5/5

Epoch 00005: saving model to weights-cnn-05-0.58.hdf5


<keras.callbacks.History at 0x7fc185b37cc0>

In [26]:
help(MaxPool1D)

Help on class MaxPooling1D in module keras.layers.pooling:

class MaxPooling1D(_Pooling1D)
 |  Max pooling operation for temporal data.
 |  
 |  # Arguments
 |      pool_size: Integer, size of the max pooling windows.
 |      strides: Integer, or None. Factor by which to downscale.
 |          E.g. 2 will halve the input.
 |          If None, it will default to `pool_size`.
 |      padding: One of `"valid"` or `"same"` (case-insensitive).
 |      data_format: A string,
 |          one of `channels_last` (default) or `channels_first`.
 |          The ordering of the dimensions in the inputs.
 |          `channels_last` corresponds to inputs with shape
 |          `(batch, steps, features)` while `channels_first`
 |          corresponds to inputs with shape
 |          `(batch, features, steps)`.
 |  
 |  # Input shape
 |      - If `data_format='channels_last'`:
 |          3D tensor with shape:
 |          `(batch_size, steps, features)`
 |      - If `data_format='channels_first'`:
 |  