In [14]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

import keras_tuner as kt
import tensorflow as tf
from tensorflow.keras.regularizers import L2
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.initializers import GlorotNormal, GlorotUniform, RandomNormal, RandomUniform, HeNormal, HeUniform
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy as bce_loss
from tensorflow.keras.metrics import (BinaryAccuracy, 
    Precision,
    Recall,
    AUC,
    BinaryCrossentropy as bce_metric, 
)
from metrics.custom_metrics import f1_m

from utilities.data_preprocessors import (build_results,
    load_meta_data,
    read_item_index_to_entity_id_file, 
    convert_rating, 
    convert_kg
)

from utilities.data_visualizers import view_vars, train_cross_results_v2
from utilities.data_loaders import load_data_splits

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
# load user-item rating data splits and meta data
meta_data = load_meta_data(f'./data/juris-600k/juris_600k_train_meta.json')
n_users, n_items = meta_data['n_users'], meta_data['n_items']

In [16]:
def model_builder(hp):
    hp_activation = hp.Choice('activation', values=['relu', 'tanh'])

    # the drop probability values, instead of keep probability
    hp_dropout = hp.Choice('dropout', values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

    # learning rate alpha
    hp_learning_rate = hp.Choice('learning_rate', values=[1.2, 0.03, 0.01, 0.0075, 0.003, 0.001, 0.0003, 0.0001])

    # regularization value lambda
    hp_lambda = hp.Choice('lambda', values=[10.0, 5.0, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.25, 0.125, 0.01,])

    
    
    # since length of user_id is only a scalar. Input would be None, 1 or m x 1
    user_id_input = tf.keras.Input(shape=(1,), dtype=tf.int64, name='user_id')
    item_id_input = tf.keras.Input(shape=(1,), dtype=tf.int64, name='item_id')

    # user and item embedding layer
    user_emb_layer = tf.keras.layers.Embedding(n_users, 8, embeddings_regularizer=L2(hp_lambda), name='user_embedding')
    item_emb_layer = tf.keras.layers.Embedding(n_items, 8, embeddings_regularizer=L2(hp_lambda), name='item_embedding')

    # bias vector embedding layer
    user_emb_bias_layer = tf.keras.layers.Embedding(n_users, 1, embeddings_initializer='zeros', name='user_embedding_bias')
    item_emb_bias_layer = tf.keras.layers.Embedding(n_items, 1, embeddings_initializer='zeros', name='item_embedding_bias')

    # initialize dot product layer and add layer for
    # embedding vectors and bias scalars respectively
    dot_layer = tf.keras.layers.Dot(axes=(2, 1))
    add_layer = tf.keras.layers.Add()

    # initialize flatten layer to flatten sum of the dot product
    # of user_emb & item_emb, user_emb_bias, and  item_emb_bias
    flatten_fact_matrix_layer = tf.keras.layers.Flatten()
    
    # initialize concat layer as input to DNN
    concat_layer = tf.keras.layers.Concatenate(axis=2)
    flatten_concat_emb_layer = tf.keras.layers.Flatten()

    # initialize dense and activation layers of DNN
    dense_layers = []
    act_layers = []
    dropout_layers = []

    layers_dims = [16, 16, 16]
    for layer_dim in layers_dims:
        dense_layers.append(tf.keras.layers.Dense(units=layer_dim, kernel_regularizer=L2(hp_lambda)))
        act_layers.append(tf.keras.layers.Activation(activation=hp_activation))

        # drop 1 - keep_prob percent of the neurons e.g. keep_prob
        # is 0.2 so drop 1 - 0.2 or 0.8/80% of the neurons at each 
        # activation layer
        dropout_layers.append(tf.keras.layers.Dropout(rate=hp_dropout))

    # initialize last layer of DNN to dense with no activation
    last_dense_layer = tf.keras.layers.Dense(units=1, activation='linear', kernel_regularizer=L2(hp_lambda))

    add_layer = tf.keras.layers.Add()

    # output layer will just be a sigmoid activation layer
    out_layer = tf.keras.layers.Activation(activation=tf.nn.sigmoid)



    # forward pass
    user_emb = user_emb_layer(user_id_input)
    item_emb = item_emb_layer(item_id_input)

    user_emb_bias = user_emb_bias_layer(user_id_input)
    item_emb_bias = item_emb_bias_layer(item_id_input)

    # calculate the dot product of the user_emb and item_emb vectors
    user_item_dot = dot_layer([user_emb, tf.transpose(item_emb, perm=[0, 2, 1])])
    fact_matrix = add_layer([user_item_dot, user_emb_bias, item_emb_bias])
    fact_matrix_flat = flatten_fact_matrix_layer(fact_matrix)

    # concatenate the user_emb and item_emb vectors
    # then feed to fully connected deep neural net
    A = concat_layer([user_emb, item_emb])
    flat_A = flatten_concat_emb_layer(A)

    for l in range(len(layers_dims)):
        Z = dense_layers[l](flat_A)
        flat_A = act_layers[l](Z)
        flat_A = dropout_layers[l](flat_A)

    A_last = last_dense_layer(flat_A)

    # add the output to the flattened factorized matrix
    sum_ = add_layer([A_last, fact_matrix_flat])

    # pass the sum of last dense layer and the flattened 
    # factorized matrix to a sigmoid activation function
    out = out_layer(sum_)

    model = tf.keras.Model(inputs=[user_id_input, item_id_input], outputs=out)
    model.summary()

    model.compile(
        optimizer=Adam(learning_rate=hp_learning_rate),
        loss=bce_loss(),
        metrics=[[bce_metric(), BinaryAccuracy(), Precision(), Recall(), AUC(), f1_m]]
    )

    return model

    

In [17]:
# define tuner
tuner = kt.Hyperband(
    model_builder, 
    objective=kt.Objective('val_f1_m', 'max'), 
    max_epochs=100,
    factor=3,
    directory='tuned_models',
    project_name='model'
)

# if cross validation loss does not improve after 10 
# consecutive epochs we stop training our modelearly
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_f1_m', patience=30, mode='max')

INFO:tensorflow:Reloading Tuner from tuned_models\model\tuner0.json


In [18]:

train_data, cross_data, test_data = load_data_splits('juris-600k', f'./data/juris-600k')
print(train_data['interaction'].value_counts())

0    185199
1    184625
Name: interaction, dtype: int64


In [19]:
# fit model to data
tuner.search(
    [train_data['user_id'], train_data['item_id']],
    train_data['interaction'],
    batch_size=8192,
    epochs=100,
    validation_data=([cross_data['user_id'], cross_data['item_id']], cross_data['interaction']),
    callbacks=[stop_early]
)

Trial 238 Complete [00h 00m 23s]
val_f1_m: 0.48639631271362305

Best val_f1_m So Far: 0.6564918756484985
Total elapsed time: 00h 38m 49s
INFO:tensorflow:Oracle triggered exit


In [21]:
hyper_params = tuner.get_best_hyperparameters()[0]

In [24]:
hp_names = ['activation', 'learning_rate', 'lambda', 'dropout']
best_hyper_params = {}

for hp in hp_names:
    best_hyper_param = hyper_params.get(hp)

    if hp not in best_hyper_params:
        best_hyper_params[hp] = best_hyper_param

In [25]:
best_hyper_params

{'activation': 'relu', 'learning_rate': 0.0003, 'lambda': 0.9, 'dropout': 0.9}

In [27]:
import json

with open('./results/best_hyper_params.json', 'w') as out_file:
    json.dump(best_hyper_params, out_file)