<a href="https://colab.research.google.com/github/Muzhi1920/awesome-models/blob/main/05-%E7%89%B9%E5%BE%81%E4%BA%A4%E4%BA%92/04_AFM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AFM

- 参考：Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks
- 论文：https://arxiv.org/pdf/1708.04617.pdf

不同粒度间进行交叉，通过attention得到weight进行pooling



In [None]:
import tensorflow as tf
from sequence_feature_layer import SequenceFeatures
from tensorflow import feature_column as fc
from tensorflow.keras.layers import Layer, Dense, LayerNormalization, Dropout, Embedding, Conv1D
import itertools

## 0.准备工作

In [None]:
seq_3101 = fc.sequence_categorical_column_with_hash_bucket('3101', hash_bucket_size=10, dtype=tf.int64)
seq_3102 = fc.sequence_categorical_column_with_hash_bucket('3102', hash_bucket_size=10, dtype=tf.int64)
target = fc.sequence_categorical_column_with_hash_bucket('target', hash_bucket_size=10, dtype=tf.int64)

seq_3101_col = fc.embedding_column(seq_3101, dimension=8)
seq_3102_col = fc.embedding_column(seq_3102, dimension=8)
target_col = fc.embedding_column(target, dimension=8)
columns = [seq_3101_col, seq_3102_col, target_col]
features={
  "3101": tf.sparse.SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0], [1, 1], [2, 0]],
      values=[1100, 1101, 1102, 1101, 1103],
      dense_shape=[3, 2]),
  "3102": tf.sparse.SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0], [1, 1], [2, 0]],
      values=[1000, 1001, 1002, 1001, 1003],
      dense_shape=[3, 2]),
  "target": tf.sparse.SparseTensor(
      indices=[[0, 0],[1,0],[2,0]],
      values=[1102,1103,1100],
      dense_shape=[3, 1]),

}
tf.sparse.to_dense(features['3101'])

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1100, 1101],
       [1102, 1101],
       [1103,    0]], dtype=int32)>

In [None]:
sequence_feature_layer = SequenceFeatures(columns, name='sequence_features_input_layer')
seq_emb_dict, seq_lengths = sequence_feature_layer(features)
seq_emb_dict.keys(), seq_lengths.keys()

(dict_keys(['3101_embedding', '3102_embedding', 'target_embedding']),
 dict_keys(['3101_embedding', '3102_embedding', 'target_embedding']))

In [None]:
seq_mask_dict = {}
for k, seq_length in seq_lengths.items():
  seq_mask = tf.expand_dims(tf.where(tf.sequence_mask(seq_length), 1.0, 0.0),axis=-1)
  seq_mask_dict[k] = seq_mask
seq_mask_dict.keys()

dict_keys(['3101_embedding', '3102_embedding', 'target_embedding'])

# 1.field粒度交叉与Attention

## 1.1Pair-wise Interaction Layer

>对field粒度进行交互；长尾分布，对大部分稀疏id训练不好

In [None]:
pwi_out=[]
for r, c in itertools.combinations(list(seq_emb_dict.values()), 2):
  r = tf.reduce_mean(r, axis=1)
  c = tf.reduce_mean(c, axis=1)
  pwi_out.append(tf.expand_dims(r*c,axis=1))
pwi_out = tf.concat(pwi_out, axis=1)
pwi_out

<tf.Tensor: shape=(3, 3, 8), dtype=float32, numpy=
array([[[-1.00356073e-03,  9.98210069e-03,  4.71053598e-03,
          1.10375509e-03, -3.21150455e-03, -6.51679412e-02,
         -3.50311808e-02,  9.80597362e-03],
        [-1.01555162e-03, -3.21287941e-03, -1.16679929e-02,
         -4.28579078e-04, -8.31099600e-02,  5.17351553e-02,
          7.88098052e-02, -8.96497630e-03],
        [ 8.41867626e-02, -4.25591916e-02, -9.31518227e-02,
         -1.86585579e-02,  5.19844750e-03, -3.24412584e-02,
         -1.15362719e-01, -2.48533100e-01]],

       [[ 8.18544030e-02, -1.95705201e-02,  6.30064402e-04,
          2.78018179e-05, -2.13796999e-02, -1.27708195e-02,
         -2.20560934e-03,  6.71758726e-02],
        [ 4.82265372e-03, -6.96517751e-02,  3.43040749e-02,
         -1.06468317e-04, -6.71342239e-02, -1.78647608e-01,
          7.32634543e-03, -8.88278782e-02],
        [ 2.24170871e-02,  4.65865582e-02,  2.95169256e-03,
         -3.43731162e-03,  1.14842281e-01,  3.05271707e-02,
       

## 1.2Attention Net定义

In [None]:
mode = 'x'
out_layer = Dense(units=1, activation=None)

def attention_unit(inputs):
  att_units = 8
  att_w = Dense(units=att_units, activation='relu', use_bias=True)
  att = Dense(units=1, activation=None)
  dropout = Dropout(0.1, trainable=True)
  a = att(att_w(inputs)) # (None, (len(sparse) * len(sparse) - 1) / 2, 1)
  att_weight = tf.nn.softmax(a, axis=1)  # (None, (len(sparse) * len(sparse) - 1) / 2, 1)
  outputs = tf.reduce_sum(inputs * att_weight, axis=1)  # (None, embed_dim)
  return outputs

## 1.3Attention聚合输出

In [None]:
if mode == 'max':
  x = tf.reduce_sum(pwi_out, axis=1)
elif mode == 'avg':
  x = tf.reduce_mean(pwi_out, axis=1)
else:
  x = attention_unit(pwi_out)
x

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[ 0.02641316, -0.01140372, -0.03234252, -0.00577684, -0.02759554,
        -0.01499589, -0.02228822, -0.07971201],
       [ 0.0364208 , -0.01493863,  0.0127764 , -0.00114742,  0.00756033,
        -0.05475701, -0.00328075, -0.02454796],
       [ 0.01541533, -0.03976187, -0.02397325,  0.0186403 , -0.01870697,
         0.00087633, -0.00050281, -0.00234059]], dtype=float32)>

## 1.4AFM输出
>可输出logits或者得到原始向量

In [None]:
tf.nn.sigmoid(out_layer(x))

<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[0.51916593],
       [0.49950588],
       [0.5109924 ]], dtype=float32)>

# 2.id粒度交叉与Attention
>同一featureField下的id交互

In [None]:
feature_emb = tf.concat([seq_emb_dict['3101_embedding'],seq_emb_dict['target_embedding']], axis=1)
feature_emb

<tf.Tensor: shape=(3, 3, 8), dtype=float32, numpy=
array([[[ 0.10851995, -0.21175861, -0.19599624,  0.17972289,
         -0.20250568,  0.42037708,  0.23111723, -0.12334363],
        [-0.1015612 ,  0.1568562 ,  0.24457736, -0.1897932 ,
          0.65568906,  0.22437245,  0.07827882,  0.16095835],
        [-0.2918778 ,  0.11703963, -0.48035088,  0.08511736,
         -0.3667829 ,  0.16048141,  0.50944287, -0.47667384]],

       [[-0.16384096,  0.18525474, -0.07343411,  0.19164915,
         -0.4320993 ,  0.32238504, -0.01347423,  0.51451015],
        [-0.1015612 ,  0.1568562 ,  0.24457736, -0.1897932 ,
          0.65568906,  0.22437245,  0.07827882,  0.16095835],
        [-0.03634223, -0.40718824,  0.40088144, -0.1147316 ,
         -0.60051256, -0.6534803 ,  0.22610572, -0.26301116]],

       [[-0.0574158 ,  0.45883518, -0.61636484, -0.2710455 ,
          0.6149231 , -0.2895744 ,  0.13206907,  0.0456478 ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  

In [None]:
pwi_out = []
for r, c in itertools.combinations(range(tf.shape(feature_emb)[1]),2):
  # print(feature_emb[:,r])
  pwi = feature_emb[:,r] * feature_emb[:,c]
  pwi_out.append(tf.expand_dims(pwi,axis=1))
pwi_out = tf.concat(pwi_out, axis=1)
pwi_out

<tf.Tensor: shape=(3, 3, 8), dtype=float32, numpy=
array([[[-0.01102142, -0.03321565, -0.04793624, -0.03411018,
         -0.13278076,  0.09432103,  0.01809159, -0.01985319],
        [-0.03167456, -0.02478415,  0.09414697,  0.01529754,
          0.07427562,  0.06746271,  0.11774103,  0.05879468],
        [ 0.02964346,  0.01835839, -0.11748295, -0.0161547 ,
         -0.24049553,  0.03600761,  0.03987859, -0.07672463]],

       [[ 0.01663989,  0.02905835, -0.01796032, -0.0363737 ,
         -0.28332278,  0.07233432, -0.00105475,  0.08281471],
        [ 0.00595435, -0.07543355, -0.02943837, -0.02198821,
          0.25948107, -0.21067227, -0.0030466 , -0.13532192],
        [ 0.00369096, -0.06387   ,  0.09804653,  0.02177528,
         -0.3937495 , -0.14662297,  0.01769929, -0.04233384]],

       [[-0.        ,  0.        , -0.        , -0.        ,
          0.        , -0.        ,  0.        ,  0.        ],
        [ 0.00972366, -0.1916537 ,  0.37867978,  0.0746325 ,
          0.08623874, -

## 2.1Attention计算与输出

In [None]:
x = attention_unit(pwi_out)
x

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[-0.00225289, -0.01180861, -0.03121676, -0.01285173, -0.11074847,
         0.06496987,  0.0556797 , -0.01733296],
       [ 0.00848957, -0.03857558,  0.01980671, -0.01082109, -0.14439084,
        -0.0991254 ,  0.00500599, -0.03372357],
       [ 0.00307262, -0.06056136,  0.11966042,  0.0235834 ,  0.0272509 ,
        -0.02765613, -0.00396368, -0.00759112]], dtype=float32)>

In [None]:
tf.nn.sigmoid(out_layer(x))

<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[0.50350255],
       [0.4885404 ],
       [0.49196237]], dtype=float32)>