In [1]:
%load_ext autoreload
%load_ext tensorboard
%autoreload 2

In [2]:
import sys
sys.path.append("..")

In [3]:
from typing import Optional,List, Dict
from tqdm.notebook import tqdm

import datetime
import os
import copy

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from recs.evaluator import metrics

import tensorflow as tf
from tensorflow import keras as tfk
from tensorboard.plugins.hparams import api as hp

In [4]:
class QNet(tfk.layers.Layer):
    def __init__(
        self,
        num_items:int,
        seq_len:Optional[int]=3,
        hidden_dim:Optional[int]=100,
        embed_dim:Optional[int]=100,
        dropout_rate:Optional[int]=0.5,
        name="QNet"
    ):
        super(QNet, self).__init__(name=name)
        
        self._embedding = tfk.layers.Embedding(num_items, embed_dim, mask_zero=True)
        self._gru = tfk.layers.GRU(
            hidden_dim, 
            dropout=dropout_rate)

        self._qvalue_dense = tfk.layers.Dense(num_items+1, activation=None)
        self._imt_dense = tfk.layers.Dense(num_items, activation="softmax")
        self._lambda = tfk.layers.Lambda(
            lambda x: tf.expand_dims(x[:, 0], axis=-1) + x[:, 1:] - tf.reduce_mean(x[:, 1:], axis=-1, keepdims=True),
            output_shape=(num_items, )
        )
    
    def call(
        self, 
        item_seqs:tf.Tensor, # (batch_size, seq_len)
        training:Optional[bool]=False,
    ):
        x = self._embedding(item_seqs)
        x = self._gru(x, training=training)
        
        i = self._imt_dense(x)
        x = self._qvalue_dense(x)
        qvalue = self._lambda(x)
        return qvalue, i

In [5]:
class BCQRec(tfk.Model):
    
    def __init__(
        self,
        num_items:int,
        seq_len:int,
        hidden_dim:Optional[int]=100,
        embed_dim:Optional[int]=100,
        threshold:Optional[float]=0.3,
        dropout_rate:Optional[int]=0.5,
        gamma:Optional[float]=1.,
        lam:Optional[float]=1e-2,
        tau:Optional[int]=1.,
        update_count:Optional[int]=20,
        k:Optional[int]=20,
        name="BCQRec"
    ):
        super(BCQRec, self).__init__(name=name)
        self._qmodel = QNet(num_items, seq_len, hidden_dim, embed_dim, dropout_rate, name="BCQ")
        self._target_qmodel = copy.deepcopy(self._qmodel)
        
        self._num_items = num_items
        
        self._topk = k
        self._threshold = threshold
        self._gamma = gamma
        self._lam = lam
        self._iterations = 0
        self._update_count = update_count
        self._loss_tracker = tfk.metrics.Mean(name="loss")
        self._recall_tracker = tfk.metrics.Recall(name="recall")
        
        dummy_state = tf.zeros((1, seq_len), dtype=tf.int32)
        self._qmodel(dummy_state)
        self._target_qmodel(dummy_state)
        
    
    def compile(self, optimizer):
        super(BCQRec, self).compile()
        self.q_loss = tfk.losses.Huber()
        self.imt_loss = tfk.losses.CategoricalCrossentropy()
        self.optimizer = optimizer
    
    def call(self, state):
        q, imt =  self._qmodel(state)
        q = imt*q + (1-imt)*-1e8
        return q
    
    def train_step(self, data):
        self._iterations += 1
        state, action, reward, n_state, done = data
        onehot_act = tf.one_hot(action-1, depth=self._num_items)
        
        with tf.GradientTape() as tape:
            # Compute Target Q-value
            q, imt = self._qmodel(n_state)
            imt = tf.cast(imt / tf.reduce_max(imt, axis=1, keepdims=True) > self._threshold, dtype=tf.float32)

            next_action_val = imt*q + (1-imt)*-1e8
            next_action = tf.argmax(next_action_val, axis=-1)
            onehot_next_act = tf.one_hot(next_action, depth=self._num_items)
            
            q, imt = self._target_qmodel(n_state)
            target_q = reward + (1-done)*self._gamma*tf.reduce_sum(q*onehot_next_act, axis=-1)
            target_q = tf.stop_gradient(target_q)
            
            # Get Current Qvalue estimate
            
            current_q, imt = self._qmodel(state)
            current_q = tf.reduce_sum(current_q*onehot_act, axis=-1)
            
            q_loss = self.q_loss(target_q, current_q)
            imt_loss = self.imt_loss(onehot_act, imt)
            
            loss = q_loss + imt_loss + self._lam * tf.reduce_mean(tf.math.pow(imt, 2))
        
        grads = tape.gradient(loss, self._qmodel.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self._qmodel.trainable_variables))
        
        self._loss_tracker.update_state(loss)
        
        if self._iterations % self._update_count == 0:
            for param, tarparam in zip(self._qmodel.trainable_variables, self._target_qmodel.trainable_variables):
                tar_param.assign(self._tau*param + (1-self._tau)*tarparam)
        
        return {"loss":self._loss_tracker.result()}
    
    def test_step(self, data):
        state, target, _, _, _ = data
        target = tf.one_hot(target-1, depth=self._num_items)
        target = tf.cast(target, dtype=tf.int32)

        qvalue = self(state)
        topkitem = tf.math.top_k(qvalue, k=self._topk)[1]
        topkitem = tf.reduce_sum(tf.one_hot(topkitem, depth=self._num_items), axis=1)
        topkitem = tf.cast(topkitem, dtype=tf.int32)
        
        self._recall_tracker.update_state(target, topkitem)
        
        return {"recall":self._recall_tracker.result()}
    
    @property
    def metrics(self):
        return [self._loss_tracker, self._recall_tracker]

In [6]:
dataname="diginetica"
modelname = "BCQRec"
default_logdir = "/home/inoue/work/recs/"
log_dir =  os.path.join(default_logdir, "logs/%s/%s/"%(dataname, modelname)+datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
train = pickle.load(open(
    "/home/inoue/work/dataset/%s/derived/mdp_train.df"%dataname, "rb"
))

split_ind = int(len(train[0])*0.9)
data = pd.read_pickle("~/work/dataset/%s/derived/train.df"%dataname)
testdata = pd.read_pickle("~/work/dataset/%s/derived/test.df"%dataname)

num_items = max(data.itemId.max(), testdata.itemId.max())+1
seq_len = train[1].shape[1]

batch_size=500
train_data = tf.data.Dataset.from_tensor_slices(
    (train[1][:split_ind, :],
     train[2][:split_ind],
     train[3][:split_ind],
     train[4][:split_ind, :], 
     train[5][:split_ind].astype(np.float32))
).shuffle(len(train[0][:split_ind])).batch(batch_size)

valid_data = tf.data.Dataset.from_tensor_slices(
    (train[1][split_ind:, :],
     train[2][split_ind:],
     train[3][split_ind:],
     train[4][split_ind:, :], 
     train[5][split_ind:].astype(np.float32))
).shuffle(len(train[0][split_ind:])).batch(batch_size)

In [7]:
HP_EMB_DIM = hp.HParam("embed_dim", hp.Discrete([100, 150, 200]))
HP_HIDDEN_DIM = hp.HParam("hidden_dim", hp.Discrete([100, 150, 200]))
HP_THRESHOLD = hp.HParam("threshold", hp.Discrete([0.1, 0.3, 0.5]))
METRIC_RECALL = "recall"

update_count = 200
learning_rate=0.001
dropout_rate = 0.5
lam = 0.01
gamma = 1.


with tf.summary.create_file_writer(os.path.join(log_dir, "hparam_tuning")).as_default():
    hp.hparams_config(
        hparams=[HP_EMB_DIM, HP_HIDDEN_DIM, HP_THRESHOLD],
        metrics=[hp.Metric(METRIC_RECALL, display_name='Recall')],
      )

In [8]:
run_count = 0
for embed_dim in HP_EMB_DIM.domain.values:
    for hidden_dim in HP_EMB_DIM.domain.values:
        for threshold in HP_THRESHOLD.domain.values:
            hparams = {
                "embed_dim":embed_dim,
                "hidden_dim":hidden_dim,
                "threshold":threshold
            }
            run_count += 1
            
            print("-----Starting trial%d"%run_count)
            print(hparams)
            model = BCQRec(num_items, seq_len, hidden_dim, embed_dim, update_count=update_count)
            model.compile(optimizer=tfk.optimizers.Adam(learning_rate=learning_rate))
            model.build(input_shape=(1,seq_len))

            hist = model.fit(
                train_data, 
                epochs=100, 
                validation_data=valid_data,
                validation_freq=1,
                callbacks=[
                    tfk.callbacks.TensorBoard(log_dir=log_dir), 
                    tfk.callbacks.ModelCheckpoint(
                        filepath=os.path.join(default_logdir, "params/BCQRec/checkpoint_%d_%d_%f"%(embed_dim, hidden_dim, threshold)),
                        save_weights_only=True,
                        monitor="val_recall",
                        mode="max",
                        save_best_only=True
                    ),
                    tfk.callbacks.EarlyStopping(
                        monitor="val_recall",
                        min_delta=1e-4,
                        patience=3,
                        mode="max",
                        verbose=1)])
            
            with tf.summary.create_file_writer(os.path.join(log_dir, "hparam_tuning")).as_default():
                hp.hparams(hparams)
                tf.summary.scalar(METRIC_RECALL, hist.history["val_recall"][-1], step=run_count)

-----Starting trial1
{'embed_dim': 100, 'hidden_dim': 100, 'threshold': 0.1}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 15: early stopping
-----Starting trial2
{'embed_dim': 100, 'hidden_dim': 100, 'threshold': 0.3}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 14: early stopping
-----Starting trial3
{'embed_dim': 100, 'hidden_dim': 100, 'threshold': 0.5}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 15: early stopping
-----Starting trial4
{'embed_dim': 100, 'hidden_dim': 150, 'threshold': 0.1}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch 11/100
Epoch 12/100
Epoch 12: early stopping
-----Starting trial16
{'embed_dim': 150, 'hidden_dim': 200, 'threshold': 0.1}
Epoch 1/100
Epoch 2/100
Epoch 3/100
113/646 [====>.........................] - ETA: 41s - loss: 8.2034

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 10: early stopping
-----Starting trial17
{'embed_dim': 150, 'hidden_dim': 200, 'threshold': 0.3}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 10: early stopping
-----Starting trial18
{'embed_dim': 150, 'hidden_dim': 200, 'threshold': 0.5}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 10: early stopping
-----Starting trial19
{'embed_dim': 200, 'hidden_dim': 100, 'threshold': 0.1}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 14: early stopping
-----Starting trial20
{'embed_dim': 200, 'hidden_dim': 100, 'threshold': 0.3}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoc

# ベストスコアの時のテスト評価を行う
hidden_dim=150, embed_dim=200, threshold=0.3の時に最も良いスコアを出す

In [9]:
hidden_dim = 150
embed_dim = 200
threshold=0.3
update_count=200
learning_rate = 0.01

model = BCQRec(num_items, seq_len, hidden_dim, embed_dim, update_count=update_count)
model.compile(optimizer=tfk.optimizers.Adam(learning_rate=learning_rate))
model.build(input_shape=(1,seq_len))

hist = model.fit(
                train_data, 
                epochs=100, 
                validation_data=valid_data,
                validation_freq=1,
                callbacks=[
                    tfk.callbacks.TensorBoard(log_dir=log_dir), 
                    tfk.callbacks.ModelCheckpoint(
                        filepath=os.path.join(default_logdir, "params/BCQRec/checkpoint_%d_%d_%f"%(embed_dim, hidden_dim, threshold)),
                        save_weights_only=True,
                        monitor="val_recall",
                        mode="max",
                        save_best_only=True
                    ),
                    tfk.callbacks.EarlyStopping(
                        monitor="val_recall",
                        min_delta=1e-4,
                        patience=3,
                        mode="max",
                        verbose=1)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 6: early stopping


In [10]:
test = pickle.load(open(
    "/home/inoue/work/dataset/%s/derived/mdp_test.df"%dataname, "rb"))

test_data = tf.data.Dataset.from_tensor_slices(
    (test[0],test[1],test[2])).shuffle(len(test[0])).batch(batch_size)

In [11]:
k =20
df = pd.DataFrame(columns=["sessionId", "recIds", "choiceId"])
for batch in tqdm(test_data):
    sess, state, target = batch
    q = model(state)
    
    topkitem = tf.math.top_k(q, k=k)[1].numpy() + 1
    tmp = pd.DataFrame(
        [sess.numpy(), topkitem, target.numpy()]).T
    tmp.columns = ["sessionId", "recIds", "choiceId"]
    df = pd.concat([df, tmp], axis=0)

  0%|          | 0/251 [00:00<?, ?it/s]

In [12]:
for k_ in [5, 10, 15, 20]:
    df["NDCG@%d"%k_] = df[["recIds", "choiceId"]].apply(lambda x: metrics.ndcg_at_k(x[1], x[0], k=k_), axis=1)
    df["Hit@%d"%k_] = df[["recIds", "choiceId"]].apply(lambda x: metrics.hit_at_k(x[1], x[0], k=k_), axis=1)

In [15]:
df.groupby("sessionId").mean().mean()

NDCG@5     0.087843
Hit@5      0.094787
NDCG@10    0.113483
Hit@10     0.150045
NDCG@15    0.128672
Hit@15     0.189883
NDCG@20    0.139226
Hit@20     0.220834
dtype: float64

In [None]:
df.to_csv("~/work/recs/results/bcq_test_result.csv")