In [0]:
%tensorflow_version 2.x

In [0]:
import tensorflow as tf
print(tf.__version__)
import tensorflow.keras as keras

from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.datasets import imdb
from tensorflow.keras.layers import Input, Embedding, Conv1D, MaxPooling1D, \
  Dense, Activation, Dropout, GlobalMaxPooling1D, Add, BatchNormalization, \
  ReLU, Flatten, Activation

import numpy as np
from sklearn.model_selection import StratifiedKFold

2.1.0-rc1


In [0]:
# Parameters:
# Word Embedding
max_features = 1000
maxlen = 400
embedding_dims = 125

# Convolution
kernel_size = 3  # filter length
filters = 125 # Authors use 250
pool_size = 4 

# Training
batch_size = 30
epochs = 5

In [0]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
x_data = np.concatenate((x_train, x_test))
y_data = np.concatenate((y_train, y_test))
skf = StratifiedKFold(n_splits=5, random_state=42)

Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 400)
x_test shape: (25000, 400)
x_data shape: (50000, 400)
y_data shape: (50000,)


In [0]:
# Block builder
def conv_block(inputs, filters, use_shortcut:bool=False, shortcut=None):
        conv_1 = Conv1D(filters=filters, kernel_size=3, padding='same')(inputs)
        bn_1 = BatchNormalization()(conv_1)
        relu_1 = ReLU()(bn_1)
        conv_2 = Conv1D(filters=filters, kernel_size=3, padding='same')(relu_1)
        bn_2 = BatchNormalization()(conv_2)
        relu_2 = ReLU()(bn_2)

        if shortcut is not None and use_shortcut:
            return Add()([inputs, shortcut])
        else:
            return relu_2
            
def selu_conv_block(inputs, filters, activation:str, use_shortcut:bool=False, shortcut=None, bnormalize:bool=False):
        conv_1 = Conv1D(filters=filters, kernel_size=3, padding='same')(inputs)
        if bnormalize:
          conv_1 = BatchNormalization()(conv_1)
        act_1 = Activation(activation=activation)(conv_1)
        conv_2 = Conv1D(filters=filters, kernel_size=3, padding='same')(act_1)
        if bnormalize:
          conv_1 = BatchNormalization()(conv_2)
        act_1 = Activation(activation=activation)(conv_2)

        if shortcut is not None and use_shortcut:
            return Add()([inputs, shortcut])
        else:
            return conv_2

In [0]:
# Source: https://stackoverflow.com/questions/56300553/what-is-k-max-pooling-how-to-implement-it-in-keras
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer, InputSpec
from tensorflow import sort 

class KMaxPooling(Layer):
    """
    K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
    TensorFlow backend.
    """
    def __init__(self, k=1, **kwargs):
        super().__init__(**kwargs)
        self.input_spec = InputSpec(ndim=3)
        self.k = k

    def compute_output_shape(self, input_shape):
        return (input_shape[0], (input_shape[2] * self.k))

    def call(self, inputs):

        # swap last two dimensions since top_k will be applied along the last dimension
        shifted_input = tf.transpose(inputs, [0, 2, 1])

        # extract top_k, returns two tensors [values, indices]
        top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0]

        # return flattened output
        return Flatten()(top_k)

In [0]:
# Network Architecture
def create_VDCNN_model_1():
  # Conneau et al. Very Deep Convolutional Networks for Text Classification
  # We test the most shallow configuration, w/o shortcuts and w "VGG-like Downsampling"
  var_input = Input(shape=(400,))
  emb_layer = Embedding(max_features,
                      embedding_dims,
                      input_length=maxlen)(var_input)
  initial_conv = Conv1D(filters=64, kernel_size=3, padding='same')(emb_layer)
  # 2 x 64 Conv block
  conv_block_1 = conv_block(inputs=initial_conv, filters=64)
  conv_block_2 = conv_block(inputs=conv_block_1, filters=64)
    #Downsample
  pooling_layer_1 = MaxPooling1D(pool_size=3, strides=2)(conv_block_2)

  # 2 x 128 Conv block
  conv_block_3 = conv_block(inputs=pooling_layer_1, filters=128)
  conv_block_4 = conv_block(inputs=conv_block_3, filters=128)
    #Downsample
  pooling_layer_2 = MaxPooling1D(pool_size=3, strides=2)(conv_block_4)

  # 2 x 256 Conv block
  conv_block_5 = conv_block(inputs=pooling_layer_2, filters=256)
  conv_block_6 = conv_block(inputs=conv_block_5, filters=256)
  #Downsample
  pooling_layer_3 = MaxPooling1D(pool_size=3, strides=2)(conv_block_6)

  # 2 x 512 Conv block
  conv_block_7 = conv_block(inputs=pooling_layer_3, filters=512)
  conv_block_8 = conv_block(inputs=conv_block_7, filters=512)

  K_max_pooling = KMaxPooling(k=8)(conv_block_8)
  flattened = Flatten()(K_max_pooling)
  dense_layer_1 = Dense(2048, activation='relu')(flattened)
  dense_layer_2 = Dense(2048, activation='relu')(dense_layer_1)
  dense_layer_final = Dense(1, activation='sigmoid')(dense_layer_2)

  model = Model(inputs=var_input, outputs=dense_layer_final)
  model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])
  return model

In [0]:
model.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 400)]             0         
_________________________________________________________________
embedding_5 (Embedding)      (None, 400, 125)          125000    
_________________________________________________________________
conv1d_85 (Conv1D)           (None, 400, 64)           24064     
_________________________________________________________________
conv1d_86 (Conv1D)           (None, 400, 64)           12352     
_________________________________________________________________
batch_normalization_32 (Batc (None, 400, 64)           256       
_________________________________________________________________
re_lu_16 (ReLU)              (None, 400, 64)           0         
_________________________________________________________________
conv1d_87 (Conv1D)           (None, 400, 64)           1235

In [0]:
for train, test in skf.split(x_data, y_data):
  target = create_VDCNN_model_1()
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [0]:
# Network Architecture
def create_VDCNN_model_2():
# Conneau et al. Very Deep Convolutional Networks for Text Classification
# We test the most shallow configuration, w/o shortcuts and w "VGG-like Downsampling"
# Now with SELU Convolutional blocks: Replace BN and ReLU layers for having the conv layer with SELU activation
  var_input = Input(shape=(400,))
  emb_layer = Embedding(max_features,
                      embedding_dims,
                      input_length=maxlen)(var_input)
  initial_conv = Conv1D(filters=64, kernel_size=3, padding='same')(emb_layer)
  # 2 x 64 Conv block
  conv_block_1 = selu_conv_block(inputs=initial_conv, filters=64, activation='selu', bnormalize=False)
  conv_block_2 = selu_conv_block(inputs=conv_block_1, filters=64, activation='selu', bnormalize=False)
  #Downsample
  pooling_layer_1 = MaxPooling1D(pool_size=3, strides=2)(conv_block_2)

  # 2 x 128 Conv block
  conv_block_3 = selu_conv_block(inputs=pooling_layer_1, filters=128, activation='selu', bnormalize=False)
  conv_block_4 = selu_conv_block(inputs=conv_block_3, filters=128, activation='selu', bnormalize=False)
  #Downsample
  pooling_layer_2 = MaxPooling1D(pool_size=3, strides=2)(conv_block_4)

  # 2 x 256 Conv block
  conv_block_5 = selu_conv_block(inputs=pooling_layer_2, filters=256, activation='selu', bnormalize=False)
  conv_block_6 = selu_conv_block(inputs=conv_block_5, filters=256, activation='selu', bnormalize=False)
  #Downsample
  pooling_layer_3 = MaxPooling1D(pool_size=3, strides=2)(conv_block_6)

  # 2 x 512 Conv block
  conv_block_7 = selu_conv_block(inputs=pooling_layer_3, filters=512, activation='selu', bnormalize=False)
  conv_block_8 = selu_conv_block(inputs=conv_block_7, filters=512, activation='selu', bnormalize=False)

  K_max_pooling = KMaxPooling(k=8)(conv_block_8)
  flattened = Flatten()(K_max_pooling)
  dense_layer_1 = Dense(2048, activation='selu')(flattened)
  dense_layer_2 = Dense(2048, activation='selu')(dense_layer_1)
  dense_layer_final = Dense(1, activation='sigmoid')(dense_layer_2)

  model2 = Model(inputs=var_input, outputs=dense_layer_final)
  model2.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])
  return model2

In [0]:
for train, test in skf.split(x_data, y_data):
  target = create_VDCNN_model_2()
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [0]:
# Network Architecture
def create_VDCNN_model_3():
  # Conneau et al. Very Deep Convolutional Networks for Text Classification
  # We test the most shallow configuration, w/o shortcuts and w "VGG-like Downsampling"
  # Now with SELU Convolutional blocks: Replace RELU with SELU (keep Batch Normalization)
  var_input = Input(shape=(400,))
  emb_layer = Embedding(max_features,
                      embedding_dims,
                      input_length=maxlen)(var_input)
  initial_conv = Conv1D(filters=64, kernel_size=3, padding='same')(emb_layer)
  # 2 x 64 Conv block
  conv_block_1 = selu_conv_block(inputs=initial_conv, filters=64, activation='selu', bnormalize=True)
  conv_block_2 = selu_conv_block(inputs=conv_block_1, filters=64, activation='selu', bnormalize=True)
  #Downsample
  pooling_layer_1 = MaxPooling1D(pool_size=3, strides=2)(conv_block_2)

  # 2 x 128 Conv block
  conv_block_3 = selu_conv_block(inputs=pooling_layer_1, filters=128, activation='selu', bnormalize=True)
  conv_block_4 = selu_conv_block(inputs=conv_block_3, filters=128, activation='selu', bnormalize=True)
  #Downsample
  pooling_layer_2 = MaxPooling1D(pool_size=3, strides=2)(conv_block_4)

  # 2 x 256 Conv block
  conv_block_5 = selu_conv_block(inputs=pooling_layer_2, filters=256, activation='selu', bnormalize=True)
  conv_block_6 = selu_conv_block(inputs=conv_block_5, filters=256, activation='selu', bnormalize=True)
  #Downsample
  pooling_layer_3 = MaxPooling1D(pool_size=3, strides=2)(conv_block_6)

  # 2 x 512 Conv block
  conv_block_7 = selu_conv_block(inputs=pooling_layer_3, filters=512, activation='selu', bnormalize=True)
  conv_block_8 = selu_conv_block(inputs=conv_block_7, filters=512, activation='selu', bnormalize=True)

  K_max_pooling = KMaxPooling(k=8)(conv_block_8)
  flattened = Flatten()(K_max_pooling)
  dense_layer_1 = Dense(2048, activation='selu')(flattened)
  dense_layer_2 = Dense(2048, activation='selu')(dense_layer_1)
  dense_layer_final = Dense(1, activation='sigmoid')(dense_layer_2)

  model3 = Model(inputs=var_input, outputs=dense_layer_final)
  model3.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])
  return model3

In [0]:
for train, test in skf.split(x_data, y_data):
  target = create_VDCNN_model_3()
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
train data shape: (40000,)
test data shape: (10000,)
Train on 40000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
