In [1]:
%load_ext autoreload
%load_ext tensorboard
%autoreload 2

In [2]:
import sys
sys.path.append("..")

In [3]:
from typing import Optional,List
from tqdm.notebook import tqdm
import datetime
import os
import copy
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from recs.dataset import session_parallel_dataset
from recs.evaluator import metrics

import tensorflow as tf
from tensorflow import keras as tfk
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd


In [4]:
class QNet(tfk.layers.Layer):
    def __init__(
        self,
        num_items:int,
        seq_len:Optional[int]=3,
        hidden_dim:Optional[int]=100,
        embed_dim:Optional[int]=100,
        dropout_rate:Optional[int]=0.5,
        name="QNet"
    ):
        super(QNet, self).__init__(name=name)
        
        self._embedding = tfk.layers.Embedding(num_items, embed_dim, mask_zero=True)
        self._gru = tfk.layers.GRU(
            hidden_dim, 
            dropout=dropout_rate)

        self._qvalue_dense = tfk.layers.Dense(num_items+1, activation=None) # アイテム数 + 状態の価値を定義
        self._lambda = tfk.layers.Lambda(
            lambda x: tf.expand_dims(x[:, 0], axis=-1) + x[:, 1:] - tf.reduce_mean(x[:, 1:], axis=-1, keepdims=True),
            output_shape=(num_items, )
        )
    
    def call(
        self, 
        item_seqs:tf.Tensor, # (batch_size, seq_len)
        training:Optional[bool]=False,
    ):
        x = self._embedding(item_seqs)
        x = self._gru(x, training=training)
        x = self._qvalue_dense(x)
        qvalue = self._lambda(x)
        return qvalue

In [5]:
class DQNRec(tfk.Model):
    def __init__(
        self,
        num_items:int,
        seq_len:Optional[int]=3,
        hidden_dim:Optional[int]=100,
        embed_dim:Optional[int]=100,
        dropout_rate:Optional[int]=0.5,
        gamma:Optional[float]=1.,
        tau:Optional[float]=1.,
        update_freq:Optional[int]=100,
        name="DQNRec"
    ):
        super(DQNRec, self).__init__(name=name)
        
        self._num_items = num_items
        self._tau = tau
        self._gamma = gamma
        self._update_freq = update_freq
        self._update_count = 0
        
        self._model = QNet(
            num_items,seq_len,hidden_dim,embed_dim, dropout_rate)
        self._target_model = QNet(
            num_items,seq_len,hidden_dim,embed_dim, dropout_rate)
        
        self._loss_tracker = tfk.metrics.Mean(name="loss")
        
        dummy_state = tf.zeros((1, seq_len), dtype=tf.int32)
        self._model(dummy_state)
        self._target_model(dummy_state)
    
    def soft_update(self):
        for param, target_param in zip(self._model.trainable_variables, self._target_model.trainable_variables):
            target_param.assign(param*self._tau + target_param*(1-self._tau))
    
    def call(self, states, training:Optional[bool]=False):
        return self._model(states, training)
    
    def train_step(self, data):
        state, action, reward, n_state, done = data
        onehot_act = tf.one_hot(action-1, depth=self._num_items)
        
        with tf.GradientTape() as tape:
            qvalue = self._model(state, training=True)
            n_qvalue = self._model(n_state, training=True)
            n_qvalue_ = self._target_model(n_state, training=True)
            
            greedy_a = tf.argmax(n_qvalue, axis=-1)
            onehot_greedy_a = tf.one_hot(greedy_a, depth=self._num_items)
            
            target = reward + (1.0 - done) * self._gamma * tf.reduce_sum(n_qvalue_*onehot_greedy_a, axis=-1)
            target = tf.stop_gradient(target)
            loss = self.loss(target, tf.reduce_sum(qvalue*onehot_act,axis=-1))
            
        grads = tape.gradient(loss, self._model.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self._model.trainable_variables))
        self._loss_tracker.update_state(loss)
        
        self._update_count += 1
        if self._update_count % self._update_freq == 0:
            self.soft_update()
        
        return  {"loss": self._loss_tracker.result()}
    
    
    @property
    def metrics(self):
        return [self._loss_tracker]

In [6]:
dataname="diginetica"
modelname = "DQNRec"
default_logdir = "/home/inoue/work/recs/"
log_dir =  os.path.join(default_logdir, "logs/%s/%s/"%(dataname, modelname)+datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
train = pickle.load(open(
    "/home/inoue/work/dataset/%s/derived/mdp_train.df"%dataname, "rb"
))
data = pd.read_pickle("~/work/dataset/%s/derived/train.df"%dataname)
testdata = pd.read_pickle("~/work/dataset/%s/derived/test.df"%dataname)

num_items = max(data.itemId.max(), testdata.itemId.max())+1
emb_dim = 64
hidden_dim = 64
seq_len = train[1].shape[1]
batch_size=500

train_data = tf.data.Dataset.from_tensor_slices(
    (train[1],train[2],train[3],train[4], train[5].astype(np.float32))
).shuffle(len(train[0])).batch(batch_size)

In [7]:
model = DQNRec(num_items, seq_len, hidden_dim, emb_dim, dropout_rate=0.1, gamma=1., update_freq=718)
model.compile(loss=tfk.losses.Huber(), optimizer=tfk.optimizers.Adam(learning_rate=0.01))
model.build(input_shape=(1,seq_len))

In [8]:
model.fit(
    train_data, 
    epochs=20, 
    callbacks=[
        tfk.callbacks.TensorBoard(log_dir=log_dir), 
        tfk.callbacks.ModelCheckpoint(
            filepath=os.path.join(default_logdir, "params/%s/%s/checkpoint"%(dataname, modelname)),
            save_weights_only=True,
            monitor="loss",
            mode="min",
            save_best_only=True
        ),
        tfk.callbacks.EarlyStopping(
            monitor="loss",
            min_delta=1e-4,
            patience=2,
            mode="min",
            verbose=1
        )
    ]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: early stopping


<keras.callbacks.History at 0x7fb08817d280>

In [9]:
test = pickle.load(open(
    "/home/inoue/work/dataset/%s/derived/mdp_test.df"%dataname, "rb"))

test_data = tf.data.Dataset.from_tensor_slices(
    (test[0],test[1],test[2])).shuffle(len(test[0])).batch(batch_size)

In [10]:
k =20
df = pd.DataFrame(columns=["sessionId", "recIds", "choiceId"])
for batch in tqdm(test_data):
    sess, state, target = batch
    pred_score = model(state)
    topkitem = tf.math.top_k(pred_score, k=k)[1].numpy() + 1
    tmp = pd.DataFrame(
        [sess.numpy(), topkitem, target.numpy()]).T
    tmp.columns = ["sessionId", "recIds", "choiceId"]
    df = pd.concat([df, tmp], axis=0)

  0%|          | 0/251 [00:00<?, ?it/s]

In [11]:
for k_ in [5, 10, 15, 20]:
    df["NDCG@%d"%k_] = df[["recIds", "choiceId"]].apply(lambda x: metrics.ndcg_at_k(x[1], x[0], k=k_), axis=1)
    df["Hit@%d"%k_] = df[["recIds", "choiceId"]].apply(lambda x: metrics.hit_at_k(x[1], x[0], k=k_), axis=1)

In [12]:
df.groupby("sessionId").sum().mean()

NDCG@5     0.000119
Hit@5      0.000138
NDCG@10    0.000201
Hit@10     0.000322
NDCG@15    0.000227
Hit@15     0.000391
NDCG@20    0.000282
Hit@20     0.000551
dtype: float64