In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import datetime
import glob
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, load_model, Model
from keras import Model, layers
from keras.layers import Dense, Embedding, Conv1D, GlobalMaxPooling1D, Dropout, Input, Flatten, MaxPooling1D, GlobalAveragePooling1D
import random
import logging
from importlib import reload
path="data/"

# Load Data & Methods

In [None]:
data = pd.read_csv(f"{path}train.csv", on_bad_lines="skip", encoding="latin-1", lineterminator="\n",
                                dtype={"statement": str, "runtime": float, "resultsize": int, "yy": int,
                                       "mm:": int, "dd": int}, memory_map=True).sample(frac=0.2)
print(len(data))

# Tokenize input SQL statements
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(data["statement"])
    
min_runtime = min(data["runtime"])
min_resultsize = min(data["resultsize"])

data_runtime = np.log(data["runtime"] + 1 - min_runtime)
data_resultsize = np.log(data["resultsize"] + 1 - min_resultsize)

# Pad input sequences
max_len = 512
sequences = tokenizer.texts_to_sequences(data["statement"])
padded = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

In [None]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        loss = train_test_model(hparams)
        tf.summary.scalar('loss', loss, step=1)
    return loss

# Hyperparameter Tuning

## CNN

In [None]:
from tensorboard.plugins.hparams import api as hp
HP_NUM_FILTERS = hp.HParam('num_filters', hp.Discrete([64,128,256]))
HP_FILTER_SIZE = hp.HParam('filter_size', hp.Discrete([3,5,7]))
HP_HIDDEN_UNITS = hp.HParam('hidden_units', hp.Discrete([64,128,256]))
HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([1024]))#16,32,64
HP_EMBED_DIM = hp.HParam('embed_dim', hp.Discrete([128,256,512]))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.3,0.5,0.7]))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam','adamax']))
HP_EPOCHS = hp.HParam('epochs', hp.Discrete([20]))
HP_LENGTH = hp.HParam('length', hp.Discrete([512]))
vocab_size = len(tokenizer.word_index) + 1

with tf.summary.create_file_writer('runs/hparam_tuning/cnn').as_default():
    hp.hparams_config(
        hparams=[HP_NUM_FILTERS, HP_FILTER_SIZE, HP_HIDDEN_UNITS, HP_BATCH_SIZE, HP_EMBED_DIM, HP_DROPOUT, HP_OPTIMIZER, HP_LENGTH, HP_EPOCHS],
        metrics=[hp.Metric('loss', display_name='loss')],
  )

In [None]:
def train_test_model(hparams):
    max_len=hparams[HP_LENGTH]
    EPOCHS=hparams[HP_EPOCHS]
    BATCH_SIZE=hparams[HP_BATCH_SIZE]

    il = Input(shape=(max_len,))
    el = Embedding(vocab_size, hparams[HP_EMBED_DIM], input_length=max_len)(il)
    cl = Conv1D(hparams[HP_NUM_FILTERS], hparams[HP_FILTER_SIZE], activation='relu')(el)
    pl = GlobalMaxPooling1D()(cl)
    hl = Dense(hparams[HP_HIDDEN_UNITS], activation='relu')(pl)
    outTime = Dense(1, name="outTime")(hl)
    outSize = Dense(1, name="outSize")(hl)

    model = Model(inputs=il, outputs=[outTime, outSize])
    model.compile(optimizer=hparams[HP_OPTIMIZER], loss={"outTime": 'huber', "outSize": "huber"})

    result = model.fit(padded, [data_runtime, data_resultsize],
          epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0,
      shuffle=True,
      validation_split=0.2)

    return result.history['val_loss'][-1]

In [None]:
reload(logging)
logging.basicConfig(filename='logs/ccnnHyper.log', level=logging.DEBUG, format="%(asctime)s    %(message)s",
                              datefmt="%H:%M")
session_num = 0

for max_len in HP_LENGTH.domain.values:
    for filter_size in HP_FILTER_SIZE.domain.values:
        for hidden_units in HP_HIDDEN_UNITS.domain.values:
            for batch_size in HP_BATCH_SIZE.domain.values:
                for embed_dim in HP_EMBED_DIM.domain.values:
                    for dropout_rate in HP_DROPOUT.domain.values:
                        for optimizer in HP_OPTIMIZER.domain.values:
                            # logging.info(session_num)
                            for epochs in HP_EPOCHS.domain.values:
                                for num_filters in HP_NUM_FILTERS.domain.values:
                                    hparams = {
                                        HP_NUM_FILTERS: num_filters,
                                        HP_FILTER_SIZE: filter_size,
                                        HP_HIDDEN_UNITS: hidden_units,
                                        HP_BATCH_SIZE: batch_size,
                                        HP_EMBED_DIM: embed_dim,
                                        HP_DROPOUT: dropout_rate,
                                        HP_OPTIMIZER: optimizer,
                                        HP_EPOCHS: epochs,
                                        HP_LENGTH: max_len,
                                      }
                                    run_name = "run-%d" % session_num
                                    loss = run('runs/hparam_tuning/cnn/' + run_name, hparams)
                                    logging.info({h.name: hparams[h] for h in hparams})
                                    logging.info(loss)
                                    session_num += 1

## VDCNN

### Model

In [None]:
N_BLOCKS = {9: (1, 1, 1, 1),
            17: (2, 2, 2, 2),
            29: (5, 5, 2, 2),
            49: (8, 8, 5, 3)}


class KMaxPooling(layers.Layer):
    """
    K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
    TensorFlow backend.
    """

    def __init__(self,
                 k=None,
                 sorted=False):
        super(KMaxPooling, self).__init__()
        self.k = k
        self.sorted = sorted

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.k, input_shape[2])

    def call(self,
             inputs):
        if self.k is None:
            k = int(tf.round(inputs.shape[1] / 2))
        else:
            k = self.k

        # Swap last two dimensions since top_k will be applied along the last dimension
        shifted_inputs = tf.transpose(inputs, [0, 2, 1])

        # Extract top_k, returns two tensors [values, indices]
        top_k = tf.nn.top_k(shifted_inputs, k=k, sorted=self.sorted)[0]

        # return flattened output
        return tf.transpose(top_k, [0, 2, 1])


class Pooling(layers.Layer):
    """Wrapper for different pooling operations.
    Including maxpooling and k-maxpooling.
    """

    def __init__(self,
                 pool_type='max',
                 name=None):
        super(Pooling, self).__init__(name=name)
        assert pool_type in ['max', 'k_max']
        self.pool_type = pool_type

        if pool_type == 'max':
            self.pool = layers.MaxPooling1D(pool_size=3, strides=2, padding='same')
        elif pool_type == 'k_max':
            self.pool = KMaxPooling()

    def call(self,
             x):
        return self.pool(x)


class ZeroPadding(layers.Layer):
    def __init__(self,
                 values,
                 name=None):
        super(ZeroPadding, self).__init__(name=name)
        self.values = values

    def call(self,
             x):
        x = tf.pad(x, [[0, 0], [0, 0], [self.values[0], self.values[1]]],
                   mode='CONSTANT', constant_values=0)
        return x


class Conv1D_BN(layers.Layer):
    """A stack of conv 1x1 and BatchNorm.
    """

    def __init__(self,
                 filters,
                 kernel_size=3,
                 strides=2,
                 padding='same',
                 use_bias=True,
                 name=None):
        super(Conv1D_BN, self).__init__(name=name)
        self.filters = filters
        self.use_bias = use_bias
        self.conv = layers.Conv1D(filters, kernel_size, strides=strides, padding=padding, use_bias=use_bias,
                                  kernel_initializer='he_normal')
        self.bn = layers.BatchNormalization()

    def call(self,
             x):
        x = self.conv(x)
        x = self.bn(x)
        return x


class ConvBlock(layers.Layer):
    """Conv block with downsampling.
    1x1 conv to increase dimensions.
    """

    def __init__(self,
                 filters,
                 kernel_size=3,
                 use_bias=True,
                 shortcut=True,
                 pool_type=None,
                 proj_type=None,
                 name=None,
                 ):
        super(ConvBlock, self).__init__(name=name)
        self.filters = filters
        self.kernel_size = kernel_size
        self.use_bias = use_bias
        self.shortcut = shortcut
        self.pool_type = pool_type
        self.proj_type = proj_type

        # Deal with downsample and pooling
        assert pool_type in ['max', 'k_max', 'conv', None]
        if pool_type is None:
            strides = 1
            self.pool = None
            self.downsample = None

        elif pool_type == 'conv':
            strides = 2  # Convolutional pooling with stride 2
            self.pool = None
            if shortcut:
                self.downsample = Conv1D_BN(filters, 3, strides=2, padding='same', use_bias=use_bias)

        else:
            strides = 1
            self.pool = Pooling(pool_type)
            if shortcut:
                self.downsample = Conv1D_BN(filters, 3, strides=2, padding='same', use_bias=use_bias)

        self.conv1 = layers.Conv1D(filters, kernel_size, strides=strides, padding='same', use_bias=use_bias,
                                   kernel_initializer='he_normal')
        self.bn1 = layers.BatchNormalization()

        self.conv2 = layers.Conv1D(filters, kernel_size, strides=1, padding='same', use_bias=use_bias,
                                   kernel_initializer='he_normal')
        self.bn2 = layers.BatchNormalization()

        assert proj_type in ['identity', 'conv', None]
        if shortcut:
            if proj_type == 'conv':
                # 1x1 conv for projection
                self.proj = Conv1D_BN(filters * 2, 1, strides=1, padding='same', use_bias=use_bias)

            elif proj_type == 'identity':
                # Identity using zero padding
                self.proj = ZeroPadding([int(filters // 2), filters - int(filters // 2)])

    def call(self,
             x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = tf.nn.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.pool is not None:
            out = self.pool(out)

        if self.shortcut:
            if self.downsample is not None:
                residual = self.downsample(residual)
            out += residual

        out = tf.nn.relu(out)

        if self.proj_type is not None and self.shortcut:
            out = self.proj(out)

        return out


class VDCNN(Model):
    """Model codebase for VDCNN.
    Args:
        depth: depth of VDCNN, one of [9, 17, 29, 49].
        seqlen: Sequence length.
        embed_dim: dim for character embeddings.
        shortcut: Use skip connections.
        pool_type: Pooling operations to be used, one of ['max', 'k_max', 'conv'].
        proj_type: Operation to increase dim for dotted skip connection, one of ['identity', 'conv'].
        use_bias: Use bias for all layers or not.
        logits: If False, return softmax probs.
    """

    def __init__(self,
                 depth=9,
                 vocab_size=69,
                 seqlen=None,
                 embed_dim=16,
                 shortcut=True,
                 pool_type='max',
                 proj_type='conv',
                 use_bias=True,
                 logits=True):
        super(VDCNN, self).__init__()
        self.depth = depth
        self.vocab_size = vocab_size
        self.seqlen = seqlen
        self.embed_dim = embed_dim
        self.shortcut = shortcut
        self.pool_type = pool_type
        self.proj_type = proj_type
        self.use_bias = use_bias
        self.logits = True

        assert pool_type in ['max', 'k_max', 'conv']
        assert proj_type in ['conv', 'identity']
        self.n_blocks = N_BLOCKS[depth]

        self.embed_char = layers.Embedding(vocab_size, embed_dim, input_length=seqlen)
        self.conv = layers.Conv1D(64, 3, strides=1, padding='same', use_bias=use_bias,
                                  kernel_initializer='he_normal')

        # Convolutional Block 64
        self.conv_block_64 = []
        for _ in range(self.n_blocks[0] - 1):
            self.conv_block_64.append(ConvBlock(64, 3, use_bias, shortcut))
        self.conv_block_64.append(ConvBlock(64, 3, use_bias, shortcut, pool_type=pool_type, proj_type=proj_type))

        # Convolutional Block 128
        self.conv_block_128 = []
        for _ in range(self.n_blocks[1] - 1):
            self.conv_block_128.append(ConvBlock(128, 3, use_bias, shortcut))
        self.conv_block_128.append(ConvBlock(128, 3, use_bias, shortcut, pool_type=pool_type, proj_type=proj_type))

        # Convolutional Block 256
        self.conv_block_256 = []
        for _ in range(self.n_blocks[2] - 1):
            self.conv_block_256.append(ConvBlock(256, 3, use_bias, shortcut))
        self.conv_block_256.append(ConvBlock(256, 3, use_bias, shortcut, pool_type=pool_type, proj_type=proj_type))

        # Convolutional Block 512
        self.conv_block_512 = []
        for _ in range(self.n_blocks[3] - 1):
            self.conv_block_512.append(ConvBlock(512, 3, use_bias, shortcut))
        self.conv_block_512.append(ConvBlock(512, 3, use_bias, shortcut, pool_type=None, proj_type=None))

        self.k_maxpool = KMaxPooling(k=8)
        self.flatten = layers.Flatten()

        # Dense layers
        self.fc1 = layers.Dense(2048, activation='relu')
        self.fc2 = layers.Dense(2048, activation='relu')
        self.outTime = layers.Dense(1, name="outTime")
        self.outSize = layers.Dense(1, name="outSize")
        # self.out = layers.Dense(2)

    def call(self,
             x):
        x = self.embed_char(x)
        # print('embed:', x.shape)
        x = self.conv(x)
        # print('conv:', x.shape)

        for l in self.conv_block_64:
            x = l(x)
        # print('conv_block_64:', x.shape)

        for l in self.conv_block_128:
            x = l(x)
        # print('conv_block_128:', x.shape)

        for l in self.conv_block_256:
            x = l(x)
        # print('conv_block_256:', x.shape)

        for l in self.conv_block_512:
            x = l(x)
        # print('conv_block_512:', x.shape)

        x = self.k_maxpool(x)
        # print('k_maxpool_8:', x.shape)
        x = self.flatten(x)
        # print('flatten:', x.shape)

        x = self.fc1(x)
        x = self.fc2(x)
        outTime = self.outTime(x)
        outSize = self.outSize(x)
        # out = self.out(x)
        # print('out:', out.shape)

        return outTime, outSize

### Tuning

In [None]:
from tensorboard.plugins.hparams import api as hp
HP_DEPTH = hp.HParam('depth', hp.Discrete([49])) # 9,17,29,49
HP_EMBED_DIM = hp.HParam('embed_dim', hp.Discrete([512,256]))#512,128,256
HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([256]))#16,32,64,128
HP_SHORTCUT = hp.HParam('shortcut', hp.Discrete([True, False])) # True, False
HP_POOL_TYPE = hp.HParam('pool_type', hp.Discrete(['conv','k_max'])) # 'conv','max','k_max'
HP_PROJ_TYPE = hp.HParam('proj_type', hp.Discrete(['identity','conv'])) # 'identity','conv'
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adamax','adam'])) # 'adamax','adam','sgd'
HP_BIAS = hp.HParam('bias', hp.Discrete([False,True])) # True,False
HP_EPOCHS = hp.HParam('epochs', hp.Discrete([20]))
HP_LENGTH = hp.HParam('length', hp.Discrete([512]))
vocab_size = len(tokenizer.word_index) + 1
run_dir = 'runs/hparam_tuning/vdcnn29/'

with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams_config(
        hparams=[HP_DEPTH, HP_EMBED_DIM, HP_BATCH_SIZE, HP_SHORTCUT, HP_POOL_TYPE, HP_PROJ_TYPE, HP_OPTIMIZER, HP_BIAS, HP_EPOCHS, HP_LENGTH],
        metrics=[hp.Metric('loss', display_name='Loss')],
  )

In [None]:
reload(logging)
logging.basicConfig(filename='logs/vdcnnHyper.log', level=logging.DEBUG, format="%(asctime)s    %(message)s",
                              datefmt="%H:%M")

def train_test_model(hparams):
    max_len=hparams[HP_LENGTH]
    EPOCHS=hparams[HP_EPOCHS]
    BATCH_SIZE=hparams[HP_BATCH_SIZE]
    
    # Model
    model = VDCNN(depth=hparams[HP_DEPTH],
                  vocab_size=vocab_size,
                  seqlen=max_len,
                  embed_dim=hparams[HP_EMBED_DIM],
                  shortcut=hparams[HP_SHORTCUT],
                  pool_type=hparams[HP_POOL_TYPE],
                  proj_type=hparams[HP_PROJ_TYPE],
                  use_bias=hparams[HP_BIAS])

    # output_1=time, output_2=card
    model.compile(optimizer=hparams[HP_OPTIMIZER], 
                  loss={"output_1": 'huber', "output_2": "huber"})
    result = model.fit(padded, [data_runtime, data_resultsize],
          epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0,
      shuffle=True,
      validation_split=0.2)
    return result.history['val_loss'][-1]

In [None]:
session_num = 0

for max_len in HP_LENGTH.domain.values:
    for embed_dim in HP_EMBED_DIM.domain.values:
        for shortcut in HP_SHORTCUT.domain.values:
            for batch_size in HP_BATCH_SIZE.domain.values:
                for pool_type in HP_POOL_TYPE.domain.values:
                    for optimizer in HP_OPTIMIZER.domain.values:
                        for proj_type in HP_PROJ_TYPE.domain.values:
                            for bias in HP_BIAS.domain.values:
                                for length in HP_LENGTH.domain.values:
                                    for epochs in HP_EPOCHS.domain.values:
                                        for depth in HP_DEPTH.domain.values:
                                            hparams = {
                                                HP_DEPTH: depth,
                                                HP_EMBED_DIM: embed_dim,
                                                HP_SHORTCUT: shortcut,
                                                HP_BATCH_SIZE: batch_size,
                                                HP_POOL_TYPE: pool_type,
                                                HP_OPTIMIZER: optimizer,
                                                HP_PROJ_TYPE: proj_type,
                                                HP_BIAS: bias,
                                                HP_LENGTH: max_len,
                                                HP_EPOCHS: epochs,
                                              }
                                            run_name = "run-%d" % session_num
                                            # logging.info('--- Starting trial: %s' % run_name)     
                                            loss = run(run_dir + run_name, hparams)
                                            logging.info({h.name: hparams[h] for h in hparams})
                                            logging.info(loss)
                                            session_num += 1