In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa
from sklearn.model_selection import train_test_split

pad_sequences = keras.preprocessing.sequence.pad_sequences

2021-10-01 22:23:11.088050: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [2]:
imdb = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [3]:
train_data, val_data, train_labels, val_labels = train_test_split(train_data, 
                                                                  train_labels, 
                                                                  test_size=0.30, 
                                                                  shuffle=True,
                                                                  random_state=0)

In [4]:
# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()

# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()} 
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import SGD, Adam

from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [6]:
pad_length = 256

train_data = pad_sequences(train_data,
                           value=word_index["<PAD>"],
                           padding='post',
                           maxlen=pad_length)

val_data = pad_sequences(val_data,
                         value=word_index["<PAD>"],
                         padding='post',
                         maxlen=pad_length)

test_data = pad_sequences(test_data,
                          value=word_index["<PAD>"],
                          padding='post',
                          maxlen=pad_length)

In [7]:
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Flatten, RepeatVector, dot, multiply, Permute, Lambda
K = keras.backend

def attention(layer):
    # --- Attention is all you need --- #
    _,_,units = layer.shape.as_list()
    attention = Dense(1, activation='tanh')(layer)
    attention = Flatten()(attention)
    attention = Activation('softmax')(attention)
    attention = RepeatVector(units)(attention)
    attention = Permute([2, 1])(attention)
    representation = multiply([layer, attention])
    representation = Lambda(lambda x: K.sum(x, axis=-2), 
                            output_shape=(units,))(representation)
    # ---------------------------------- #
    return representation

In [8]:
def get_optimizer(option=0, learning_rate=0.001):
    if option==0:
        return tf.keras.optimizers.Adam(learning_rate)
    elif option==1:
        return tf.keras.optimizers.SGD(learning_rate, momentum=0.9, nesterov=True)
    elif option==2:
        return tfa.optimizers.RectifiedAdam(learning_rate)
    elif option==3:
        return tfa.optimizers.Lookahead(tf.optimizers.Adam(learning_rate), sync_period=3)
    elif option==4:
        return tfa.optimizers.SWA(tf.optimizers.Adam(learning_rate))
    elif option==5:
        return tfa.optimizers.SWA(tf.keras.optimizers.SGD(learning_rate, momentum=0.9, nesterov=True))
    else:
        return tf.keras.optimizers.Adam(learning_rate)

In [9]:
layers = keras.layers
models = keras.models
    
def create_tunable_model(hp, vocab_size=10000, pad_length=256):

    # Instantiate model params
    embedding_size = hp.Int('embedding_size', min_value=8, max_value=512, step=8)
    spatial_dropout = hp.Float('spatial_dropout', min_value=0, max_value=0.5, step=0.05)

    conv_layers = hp.Int('conv_layers', min_value=1, max_value=5, step=1)
    rnn_layers = hp.Int('rnn_layers', min_value=1, max_value=5, step=1)
    dense_layers = hp.Int('dense_layers', min_value=1, max_value=3, step=1)

    conv_filters = hp.Int('conv_filters', min_value=32, max_value=512, step=32)
    conv_kernel = hp.Int('conv_kernel', min_value=1, max_value=8, step=1)

    concat_dropout = hp.Float('concat_dropout', min_value=0, max_value=0.5, step=0.05)
    dense_dropout = hp.Float('dense_dropout', min_value=0, max_value=0.5, step=0.05)

    inputs = layers.Input(name='inputs',shape=[pad_length])
    layer  = layers.Embedding(vocab_size, embedding_size, input_length=pad_length)(inputs)
    layer  = layers.SpatialDropout1D(spatial_dropout)(layer)

    for l in range(conv_layers):
        if l==0:
            conv = layers.Conv1D(filters=conv_filters, kernel_size=conv_kernel, 
                                 padding='valid', kernel_initializer='he_uniform')(layer)
        else:
            conv = layers.Conv1D(filters=conv_filters, kernel_size=conv_kernel, 
                                 padding='valid', kernel_initializer='he_uniform')(conv) 

    avg_pool_conv = layers.GlobalAveragePooling1D()(conv)
    max_pool_conv = layers.GlobalMaxPooling1D()(conv)

    representations = list()
    for l in range(rnn_layers):
        
        use_bidirectional = hp.Choice(f'use_bidirectional_{l}', values=[0, 1])
        use_lstm = hp.Choice(f'use_lstm_{l}', values=[0, 1])
        units = hp.Int(f'units_{l}', min_value=8, max_value=512, step=8)

        if use_lstm == 1:
            rnl = layers.LSTM
        else:
            rnl = layers.GRU

        if use_bidirectional==1:
            layer = layers.Bidirectional(rnl(units, return_sequences=True))(layer)
        else:
            layer = rnl(units, return_sequences=True)(layer)

        representations.append(attention(layer))

    layer = layers.concatenate(representations + [avg_pool_conv, max_pool_conv])
    layer = layers.Dropout(concat_dropout)(layer)

    for l in range(dense_layers):
        dense_units = hp.Int(f'dense_units_{l}', min_value=8, max_value=512, step=8)
        layer = layers.Dense(dense_units)(layer)
        layer  = layers.LeakyReLU()(layer)
        layer = layers.Dropout(dense_dropout)(layer)

    layer  = layers.Dense(1, name='out_layer')(layer)
    outputs  = layers.Activation('sigmoid')(layer)

    model  = models.Model(inputs=inputs, outputs=outputs)

    hp_learning_rate = hp.Choice('learning_rate', values=[0.002, 0.001, 0.0005])
    optimizer_type = hp.Choice('optimizer', values=list(range(6)))
    optimizer = get_optimizer(option=optimizer_type, learning_rate=hp_learning_rate)
    
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['acc'])
    
    return model

In [10]:
import keras_tuner as kt

In [11]:
tuner = kt.BayesianOptimization(hypermodel=create_tunable_model,
                                objective='val_acc',
                                max_trials=100,
                                num_initial_points=3,
                                directory='storage',
                                project_name='imdb',
                                seed=42)

tuner.search(train_data, train_labels, 
             epochs=30,
             batch_size=64, 
             validation_data=(val_data, val_labels),
             shuffle=True,
             verbose=2,
             callbacks = [EarlyStopping('val_acc', patience=3, restore_best_weights=True)]
             )

Trial 100 Complete [00h 03m 42s]
val_acc: 0.876800000667572

Best val_acc So Far: 0.8925333619117737
Total elapsed time: 08h 16m 22s


In [13]:
best_hps = tuner.get_best_hyperparameters()[0]
model = tuner.hypermodel.build(best_hps)

In [14]:
print(best_hps.values)

{'embedding_size': 264, 'spatial_dropout': 0.2, 'conv_layers': 1, 'rnn_layers': 2, 'dense_layers': 1, 'conv_filters': 192, 'conv_kernel': 3, 'concat_dropout': 0.4, 'dense_dropout': 0.15000000000000002, 'use_bidirectional_0': 0, 'use_lstm_0': 0, 'units_0': 464, 'dense_units_0': 384, 'learning_rate': 0.002, 'optimizer': 3, 'use_bidirectional_1': 0, 'use_lstm_1': 1, 'units_1': 512, 'dense_units_1': 136, 'dense_units_2': 360}


In [15]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputs (InputLayer)             [(None, 256)]        0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 256, 264)     2640000     inputs[0][0]                     
__________________________________________________________________________________________________
spatial_dropout1d (SpatialDropo (None, 256, 264)     0           embedding[0][0]                  
__________________________________________________________________________________________________
gru (GRU)                       (None, 256, 464)     1016160     spatial_dropout1d[0][0]          
______________________________________________________________________________________________

In [16]:
model.save("best_model.h5")