STAMP: Short-Term Attention/Memory Priority Model for Session-based Recommendation

参考：https://dl.acm.org/doi/10.1145/3219819.3219950


一种新的短期注意/记忆优先级(Short-Term Attention/Memory priority, STAMP)模型：该模型能够从会话上下文的长期记忆中捕获用户的通用兴趣，同时从最后点击的短期记忆中考虑用户当前的兴趣。。

session内的推荐，序列推荐召回。结合长期历史兴趣，和当前兴趣联合推荐。简单的序列模型未考虑到当前用户兴趣对下一行为的影响。误点击的行为导致兴趣偏移。


具体地：t时刻的session集合，预估t+1时刻的item。利用长短期兴趣训练分类器，对候选集进行打分，从而排序。得到top-k个item

注意：session固定长度，而非变长；候选集量为超参。

In [1]:
import tensorflow as tf
from sequence_feature_layer import SequenceFeatures
from tensorflow import feature_column as fc
from tensorflow.keras.layers import Layer, Dense, LayerNormalization, Dropout, Embedding, Conv1D

## 0.准备工作

In [2]:
seq = fc.sequence_categorical_column_with_hash_bucket('seq', hash_bucket_size=10, dtype=tf.int64)
target = fc.sequence_categorical_column_with_hash_bucket('target', hash_bucket_size=10, dtype=tf.int64)
seq_col = fc.embedding_column(seq, dimension=8)
target_col = fc.embedding_column(target, dimension=8)
columns = [seq_col, target_col]
features={
  "seq": tf.sparse.SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0], [1, 1], [2, 0],[2,1]],
      values=[1100, 1101, 1102, 1101, 1103,1102],
      dense_shape=[3, 2]),
  "target": tf.sparse.SparseTensor(
      indices=[[0, 0],[1,0],[2,0],[3,0],[4,0],[5,0]],
      values=[1102,1103,1100,1102,1103,1100],
      dense_shape=[6, 1]),

}
tf.sparse.to_dense(features['seq'])

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1100, 1101],
       [1102, 1101],
       [1103, 1102]], dtype=int32)>

In [3]:
sequence_feature_layer = SequenceFeatures(columns, name='sequence_features_input_layer')
seq_emb_dict, seq_len_dict = sequence_feature_layer(features)
seq_emb_dict.keys()

dict_keys(['seq_embedding', 'target_embedding'])

## padding前补0

对于变长序列，这里session中的行为序列，所以长度是固定的

In [4]:
class attention(Layer):
  def __init__(self, dims):
    self.W1 = Dense(dims)
    self.W2 = Dense(dims)
    self.W3 = Dense(dims)
    self.outputs = Dense(1)
    super(attention, self).__init__()

  def call(self, inputs):
    seq_emb, mt, ms = inputs
    w1_out = self.W1(seq_emb)
    w2_out = tf.expand_dims(self.W2(mt), axis=1)
    w3_out = tf.expand_dims(self.W3(ms), axis=1)
    att_weight = self.outputs(tf.nn.sigmoid(w1_out + w2_out + w3_out))
    print(att_weight)
    res = tf.reduce_sum(att_weight * seq_emb, axis=1)
    return res

In [5]:
cell_units = [16, 8]
cell_a_dnn = [Dense(units, activation='tanh') for units in cell_units]
cell_b_dnn = [Dense(units, activation='tanh') for units in cell_units]

hidden_units = 8
att = attention(hidden_units)
att

<__main__.attention at 0x7f3effb4e910>

In [6]:
seq_emb, target_emb = seq_emb_dict['seq_embedding'], seq_emb_dict['target_embedding']

In [7]:
ms = tf.reduce_mean(seq_emb, axis=1)
ms

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[-0.30325288, -0.47532415,  0.07815251,  0.16372712, -0.17046262,
         0.00407058,  0.390729  , -0.09066617],
       [ 0.09924899, -0.4402496 ,  0.31286696, -0.26232952,  0.13235652,
        -0.36271352,  0.36017677,  0.16390236],
       [ 0.09021419, -0.34764692, -0.01876904,  0.04579969,  0.1799705 ,
        -0.1926912 ,  0.04571386,  0.11098365]], dtype=float32)>

In [8]:
mt = seq_emb[:,-1]
mt

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[ 0.06369966, -0.6984328 ,  0.23575838, -0.2982112 ,  0.02229101,
        -0.6095506 ,  0.54416585,  0.21739753],
       [ 0.06369966, -0.6984328 ,  0.23575838, -0.2982112 ,  0.02229101,
        -0.6095506 ,  0.54416585,  0.21739753],
       [ 0.13479832, -0.18206638,  0.38997555, -0.22644787,  0.24242204,
        -0.11587646,  0.17618768,  0.11040719]], dtype=float32)>

In [9]:
att_output = att([seq_emb, mt, ms])
att_output

tf.Tensor(
[[[0.6364007 ]
  [0.36713803]]

 [[0.49476144]
  [0.43527827]]

 [[0.39180413]
  [0.37822703]]], shape=(3, 2, 1), dtype=float32)


<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[-0.40313262, -0.4169314 ,  0.0359917 ,  0.28868926, -0.22296719,
         0.16931026,  0.35079688, -0.17393707],
       [ 0.09442008, -0.39409205,  0.29556537, -0.24184252,  0.12964387,
        -0.32265532,  0.32403445,  0.14925364],
       [ 0.06886242, -0.26994705, -0.02000231,  0.03896353,  0.13773498,
        -0.14942116,  0.03342964,  0.08546869]], dtype=float32)>

In [10]:
# hs = tf.concat([att_output, tf.squeeze(target_emb, axis=1)], axis=-1)
# ht = tf.concat([mt, tf.squeeze(target_emb, axis=1)], axis=-1)
hs = att_output
ht = mt
hs,ht

(<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
 array([[-0.40313262, -0.4169314 ,  0.0359917 ,  0.28868926, -0.22296719,
          0.16931026,  0.35079688, -0.17393707],
        [ 0.09442008, -0.39409205,  0.29556537, -0.24184252,  0.12964387,
         -0.32265532,  0.32403445,  0.14925364],
        [ 0.06886242, -0.26994705, -0.02000231,  0.03896353,  0.13773498,
         -0.14942116,  0.03342964,  0.08546869]], dtype=float32)>,
 <tf.Tensor: shape=(3, 8), dtype=float32, numpy=
 array([[ 0.06369966, -0.6984328 ,  0.23575838, -0.2982112 ,  0.02229101,
         -0.6095506 ,  0.54416585,  0.21739753],
        [ 0.06369966, -0.6984328 ,  0.23575838, -0.2982112 ,  0.02229101,
         -0.6095506 ,  0.54416585,  0.21739753],
        [ 0.13479832, -0.18206638,  0.38997555, -0.22644787,  0.24242204,
         -0.11587646,  0.17618768,  0.11040719]], dtype=float32)>)

In [11]:
x = hs
for nn in cell_a_dnn:
  x = nn(x)
hs_output = x
hs_output

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[ 0.1446109 , -0.25917265,  0.18615827,  0.21205984,  0.03884844,
        -0.00970255,  0.01614881, -0.35668835],
       [-0.0792862 ,  0.20885804, -0.03938186, -0.31296742,  0.07794664,
        -0.17145018,  0.05863029, -0.5442319 ],
       [-0.15338051,  0.00790378, -0.16789445,  0.01062359,  0.01195348,
        -0.08180121, -0.13447244, -0.28689724]], dtype=float32)>

In [12]:
x = ht
for nn in cell_b_dnn:
  x = nn(x)
ht_output = x
ht_output

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[-0.12037019,  0.3831112 ,  0.1769916 , -0.1798836 , -0.22181237,
         0.16533694, -0.17172574, -0.07363203],
       [-0.12037019,  0.3831112 ,  0.1769916 , -0.1798836 , -0.22181237,
         0.16533694, -0.17172574, -0.07363203],
       [-0.00865444,  0.14078727, -0.01030668, -0.30018646,  0.02941729,
         0.25266528, -0.03232251,  0.0050133 ]], dtype=float32)>

In [13]:
item_emb = tf.squeeze(seq_emb_dict['target_embedding'], axis=1)
item_emb

<tf.Tensor: shape=(6, 8), dtype=float32, numpy=
array([[-0.21213177, -0.3134294 , -0.08899713,  0.13866314,  0.53298825,
        -0.0147664 ,  0.3018123 , -0.05883517],
       [ 0.26818684, -0.07572377,  0.23721373, -0.24587238, -0.12480663,
        -0.02375418, -0.260393  ,  0.15189636],
       [ 0.21987581, -0.1217793 ,  0.06570315, -0.07897852,  0.4146207 ,
        -0.10026625, -0.01605059,  0.17732258],
       [-0.21213177, -0.3134294 , -0.08899713,  0.13866314,  0.53298825,
        -0.0147664 ,  0.3018123 , -0.05883517],
       [ 0.26818684, -0.07572377,  0.23721373, -0.24587238, -0.12480663,
        -0.02375418, -0.260393  ,  0.15189636],
       [ 0.21987581, -0.1217793 ,  0.06570315, -0.07897852,  0.4146207 ,
        -0.10026625, -0.01605059,  0.17732258]], dtype=float32)>

In [14]:
z = tf.matmul(tf.multiply(tf.expand_dims(ht_output, axis=1), item_emb), tf.expand_dims(hs_output, axis=-1))
z = tf.squeeze(z, axis=-1)
z

<tf.Tensor: shape=(3, 6), dtype=float32, numpy=
array([[ 0.01964047,  0.02587041,  0.01473159,  0.01964047,  0.02587041,
         0.01473159],
       [-0.03287007, -0.00745524, -0.00960904, -0.03287007, -0.00745524,
        -0.00960904],
       [ 0.0006625 ,  0.00056312,  0.00241525,  0.0006625 ,  0.00056312,
         0.00241525]], dtype=float32)>

In [15]:
outputs = tf.nn.softmax(z)
outputs

<tf.Tensor: shape=(3, 6), dtype=float32, numpy=
array([[0.16659157, 0.16763267, 0.16577579, 0.16659157, 0.16763267,
        0.16577579],
       [0.16397345, 0.1681942 , 0.16783233, 0.16397345, 0.1681942 ,
        0.16783233],
       [0.16657478, 0.16655822, 0.16686699, 0.16657478, 0.1665582 ,
        0.16686699]], dtype=float32)>