<a href="https://colab.research.google.com/github/Muzhi1920/awesome-models/blob/main/05-%E7%89%B9%E5%BE%81%E4%BA%A4%E4%BA%92/05_DIN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DIN
- 参考：Deep Interest Network for Click-Through Rate Prediction
- https://arxiv.org/pdf/1706.06978.pdf


通过历史的id、target_id，及其积、差去计算attention，然后对seq_emb进行pooling。

In [None]:
import tensorflow as tf
from sequence_feature_layer import SequenceFeatures
from tensorflow import feature_column as fc
from tensorflow.keras.layers import Layer, Dense, LayerNormalization, Dropout, Embedding, Conv1D, BatchNormalization

## 0.准备工作

In [None]:
seq_tag = fc.sequence_categorical_column_with_hash_bucket('seq_tag', hash_bucket_size=24, dtype=tf.int64)
seq_id = fc.sequence_categorical_column_with_hash_bucket('seq_id', hash_bucket_size=10, dtype=tf.int64)

target_tag = fc.sequence_categorical_column_with_hash_bucket('target_tag', hash_bucket_size=24, dtype=tf.int64)
target_id = fc.sequence_categorical_column_with_hash_bucket('target_id', hash_bucket_size=10, dtype=tf.int64)

seq_tag_col = fc.embedding_column(seq_tag, dimension=8)
seq_id_col = fc.embedding_column(seq_id, dimension=8)
target_tag_col = fc.embedding_column(target_tag, dimension=8)
target_id_col = fc.embedding_column(target_id, dimension=8)

columns = [seq_tag_col, seq_id_col, target_tag_col, target_id_col]

features={
  "seq_tag": tf.sparse.SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0], [1, 1], [2, 0]],
      values=[1100, 1101, 1102, 1100, 1103],
      dense_shape=[3, 2]),
  "target_tag": tf.sparse.SparseTensor(
      indices=[[0, 0],[1,0],[2,0]],
      values=[1102,1103,1100],
      dense_shape=[3, 1]),
  
  "seq_id": tf.sparse.SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0], [1, 1], [2, 0]],
      values=[2200, 2201, 2202, 2200, 2203],
      dense_shape=[3, 2]),
  "target_id": tf.sparse.SparseTensor(
      indices=[[0, 0],[1,0],[2,0]],
      values=[2202, 2203, 2200],
      dense_shape=[3, 1]),
}
tf.sparse.to_dense(features['seq_id'])

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[2200, 2201],
       [2202, 2200],
       [2203,    0]], dtype=int32)>

In [None]:
sequence_feature_layer = SequenceFeatures(columns, name='sequence_features_input_layer')
seq_emb_dict, seq_lengths_dict = sequence_feature_layer(features)
seq_emb_dict.keys()

dict_keys(['seq_id_embedding', 'seq_tag_embedding', 'target_id_embedding', 'target_tag_embedding'])

# 1.DIN模型

## 1.1模型配置

In [None]:
din_config = [
    {
      'target':'target_tag_embedding',
      'seq':'seq_tag_embedding',
      'hidden_dnn': [Dense(unit, activation='relu') for unit in [32, 16]],
      'att_output': Dense(1)
    },
    {
      'target':'target_id_embedding',
      'seq':'seq_id_embedding',
      'hidden_dnn': [Dense(unit, activation='relu') for unit in [32, 16]],
      'att_output': Dense(1)
      }
]

bn_layer = BatchNormalization(trainable=True)
ffn = [Dense(16),Dense(8)]
output_layer = Dense(1, activation='sigmoid')

## 1.2Attention网络

In [None]:
def attention_layer(inputs, conf):
  # query: candidate item  (None, d), d is the dimension of embedding
  # key: hist items  (None, seq_len, d) 
  # value: hist items  (None, seq_len, d) 
  # mask: (None, seq_len, 1)
  q, k, v, mask = inputs
  q = tf.tile(q, multiples=[1, k.shape[1], 1])  # (None, seq_len, d)
  info = tf.concat([q, k, q - k, q * k], axis=-1)

  # att_network
  for dense in conf['hidden_dnn']:
    info = dense(info)
  outputs = conf['att_output'](info)  # (None, seq_len, 1)

  # mask processing
  paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)  # (None, seq_len, 1)
  outputs = tf.where(mask, outputs, paddings)  # (None, seq_len, d)

  # softmax
  outputs = tf.nn.softmax(logits=outputs, axis=1)  # (None, seq_len, 1)
  outputs = tf.reduce_sum(outputs * v, axis=1) # (None, seq_len, d)
  return outputs

## 1.3Attention计算

In [None]:
din_output = []
for index, conf in enumerate(din_config):
  # 1. get seq_emb
  seq_emb = seq_emb_dict[conf['seq']]
  target_emb = seq_emb_dict[conf['target']]
  
  # 2. get mask
  seq_len = seq_lengths_dict[conf['seq']]
  seq_mask = tf.expand_dims(tf.sequence_mask(seq_len), axis=2)

  # 3. din_att
  din_emb = attention_layer([target_emb, seq_emb, seq_emb, seq_mask], conf)
  user_info = tf.concat([din_emb, tf.squeeze(target_emb, axis=1)], axis=-1)
  din_output.append(user_info)

In [None]:
din_output

[<tf.Tensor: shape=(3, 16), dtype=float32, numpy=
 array([[-0.07797419, -0.08751298,  0.36610502, -0.10494256, -0.0212444 ,
          0.04166278, -0.13390894,  0.37290865, -0.50531864,  0.631155  ,
          0.08560061, -0.01462146, -0.38034615, -0.5496842 , -0.12611875,
          0.35165834],
        [-0.12444351,  0.20455462,  0.13561232,  0.05765025, -0.2207866 ,
         -0.11927465,  0.21581909,  0.33374444, -0.5082068 ,  0.10135546,
         -0.38784412,  0.00387459, -0.47680038,  0.28935573, -0.04011736,
         -0.1338875 ],
        [-0.27438214, -0.41840288, -0.16604301, -0.21779725, -0.0090143 ,
         -0.44751582, -0.19455771, -0.18151334,  0.17689045, -0.19528258,
         -0.11762542, -0.23027322,  0.09872594, -0.22766413,  0.31389937,
         -0.12220497]], dtype=float32)>,
 <tf.Tensor: shape=(3, 16), dtype=float32, numpy=
 array([[-0.21482648, -0.3612737 ,  0.00179894,  0.29539776,  0.00997971,
          0.05537786,  0.02404993, -0.03884788, -0.39602447,  0.4749365 ,

# 2.网络输出

In [None]:
net = tf.concat(din_output, axis=-1)
net = bn_layer(net)
net

<tf.Tensor: shape=(3, 32), dtype=float32, numpy=
array([[-0.07793521, -0.08746924,  0.36592203, -0.10489011, -0.02123379,
         0.04164196, -0.13384202,  0.37272227, -0.5050661 ,  0.6308396 ,
         0.08555783, -0.01461416, -0.38015607, -0.5494095 , -0.12605572,
         0.3514826 , -0.21471912, -0.36109313,  0.00179804,  0.29525012,
         0.00997472,  0.05535019,  0.02403791, -0.03882846, -0.39582655,
         0.4746991 , -0.21825579,  0.10394645,  0.16825305, -0.15642639,
        -0.26023072, -0.15079734],
       [-0.12438131,  0.20445238,  0.13554455,  0.05762143, -0.22067626,
        -0.11921504,  0.21571122,  0.33357763, -0.5079528 ,  0.10130481,
        -0.38765028,  0.00387266, -0.47656208,  0.28921112, -0.04009731,
        -0.13382058,  0.21559097, -0.30968106, -0.28247604,  0.4891094 ,
        -0.09948716, -0.07734136, -0.05065245, -0.10123821, -0.5277855 ,
        -0.2972394 , -0.34372246,  0.39362493, -0.42481342,  0.24397811,
        -0.51903135, -0.16902508],
     

In [None]:
for nn in ffn:
  net = nn(net)
net

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[ 0.0298623 ,  0.37747207, -0.5407858 ,  0.07977922, -0.2199403 ,
        -0.05747664, -0.14641593,  0.08642846],
       [-0.08119683, -0.5162671 ,  0.15861546,  0.60475576, -0.27961898,
         0.30818018, -0.14244166,  0.5265868 ],
       [-0.08044617, -0.49068213, -0.01410597,  0.25885564,  0.58016807,
         0.21664813,  0.37478286, -0.40209168]], dtype=float32)>

In [None]:
output_layer(net)

<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[0.39760578],
       [0.79265916],
       [0.46889168]], dtype=float32)>