| 名称      | 含义 |
| ----------- | ----------- |
| n_user      | 用户数       |
| n_item   | 交互项目数        |
| train_data   | 训练集        |
| eval_data   | 验证集        |
| test_data   | 测试集        |
| adj_item   | 行:用户 列:用户产生的交互编号 每个用户采样20个交互        |
| n_classes   | 待分类的结果数量 例如性别就是男、女2类        |
| ratings_np   | 从ratings文件中读取的原始数据 numpy数组 每个用户一行 列分别是(用户id, 交互id, 性别)        |
| adj_user   | adj_user[i][j]: 与用户i产生过的某个用户id (没有顺序, 是随机采样的) |
| n_entity   | 知识库中实体的个数(头尾做union)        |
| n_relation   | 知识库中关系的个数(头尾做union)        |
| adj_entity   | adj_entity[i]: 与实体i有关系的实体集合(采样)        |
| adj_relation   | adj_relation[i]: 与实体i有关系的关系(采样)        |

# 数据预处理

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
# 加载并预处理数据
def read_file(filename):
  with open('{}.npy'.format(filename), 'rb') as f:
    ret = np.load(f)
  return ret

n_user, n_item, n_entity, n_relation, n_classes = read_file('n_file')     
train_data = read_file('train_data')
eval_data = read_file('eval_data')
test_data = read_file('test_data')    
adj_entity = read_file('adj_entity') 
adj_relation = read_file('adj_relation')
adj_item = read_file('adj_item')   
adj_entity = read_file('adj_entity')   
ratings_np = read_file('ratings_np') 
adj_user = read_file('adj_user')   

In [3]:
# mock命令行参数
arg = {}
arg['ratio'] = 1
arg['n_interact'] = 20
arg['n_neighbors'] = 20
arg['dataset'] = 'movie'
arg['mission'] = 'gender'
arg['neighbor_sample_size'] = 4
arg['dim'] = 32
arg['lr'] = 0.02
arg['l2_weight'] = 0.001
arg['n_iter'] = 2
arg['batch_size'] = 128

In [4]:
# 构建初始值
user_indicies = tf.placeholder(dtype=tf.int64, shape=[None], name='user_indicies')
item_indicies = tf.placeholder(dtype=tf.int64, shape=[None], name='item_indicies')
labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels')

In [5]:
def get_initializer():
  # 这个初始化器是用来使得每一层输出的方差应该尽量相等。
  return tf.contrib.layers.xavier_initializer()

# 构建模型

## 创建embedding矩阵作为用户特征输入 并构建训练参数矩阵

In [6]:
# 构建模型
dim = arg['dim']
# 用户特征embedding矩阵
user_emb_matrix = tf.get_variable(
            shape=[n_user, dim], initializer=get_initializer(), name='user_emb_matrix')
# 实体特征embedding矩阵
entity_emb_matrix = tf.get_variable(
            shape=[n_entity, dim], initializer=get_initializer(), name='entity_emb_matrix')
# 关系embedding矩阵
relation_emb_matrix = tf.get_variable(
    shape=[n_relation, dim], initializer=get_initializer(), name='relation_emb_matrix')
# 输出层参数w
output_weights = tf.get_variable(
    shape=[dim, n_classes], initializer=get_initializer(),name='output_weights1')
# 输出层偏置
output_bias = tf.get_variable(
    shape=[n_classes], initializer=tf.zeros_initializer(), name='output_bias1')

## 获取近邻实体与关系

In [7]:
seeds = tf.expand_dims(item_indicies, axis=1) # item_indicies:[n] -> seeds:[n,1] 
seeds.shape # 从行向量变成列向量了

TensorShape([Dimension(None), Dimension(1)])

### 收集算法
1. 对于用户u的交互实体集合$N_i(u)$中的每一个实体$e_i^0$,收集其近邻实体集合$N_i(e_i^0)$, 集合中的每一个实体直接与实体$e_i^0$相连。 则有关系r_{e_i^0,e_j^1}表示实体$e_i^0$和实体$e_j^1$之间的关系,其中$e^1_j\in N_i(e_i^0)$。 
2. 对于每一个$N_i(e_i^0)$中的实体$e_j^1$,进行近邻实体获取,得到$e_j^1$的近邻实体集合$N_i(e_j^1)$。 
3. 对$e_j^1$的近邻实体集合$N_i(e_j^1)$中的实体$e_k^2$,进行近邻实体获取,直到递归次数达到H次,递归收集过程终止。 


![迭代搜集近邻实体算法](assets/alg3.1_get_neighbor_entities.png)

In [8]:
adj_entity.shape

(37473, 4)

### 以下构建临近矩阵
(意义不明)
- entities
- interact_item
- neighbor_users

In [9]:
entities = [seeds] # entities:[1,n,1]
relations = []
n_iter = arg['n_iter'] # 迭代次数是超参
batch_size = arg['batch_size']
for i in range(n_iter):
    # tf.gather(a,b): 在tensor a中获取下标为 b(可以是列表)的tensor
    # tf.gather(adj_entity, entities[i]) -> [n,1,4]
    neighbor_entities = tf.reshape(tf.gather(adj_entity, entities[i]), [batch_size, -1]) # 这是在干什么？？？
    neighbor_relations = tf.reshape(tf.gather(adj_relation, entities[i]), [batch_size, -1])
    entities.append(neighbor_entities) # 每次迭代用上一次迭代结果的entities继续寻找邻居
    relations.append(neighbor_relations)

In [10]:
users = tf.expand_dims(user_indicies, axis=1)
# 获取user交互过的电影 n=10
interact_item = tf.reshape(tf.gather(adj_item, users), [batch_size, -1])
neighbor_users = tf.reshape(tf.gather(adj_user, users), [batch_size, -1])

In [11]:
# 从user_emb_matrix中取出user_indicies列表对应的列
user_embedding = tf.nn.embedding_lookup(user_emb_matrix, user_indicies)

## 聚集

获取该batch下的相应实体和关系的嵌入向量

entity_vectors和relation_vectors里面存的是[[seeds],[一跳邻居],[二跳邻居]]的embedding

其中每个元素的形状,[batch_size,n_neighers,dim],[batch_size,n_neighbers^2,dim]

In [12]:
entity_vectors = [tf.nn.embedding_lookup(entity_emb_matrix, i) for i in entities]
relation_vectors = [tf.nn.embedding_lookup(relation_emb_matrix, i) for i in relations]

### Aggregator

In [13]:
# Utils to generate Layer ids
LAYER_IDS = {}
def get_layer_id(layer_name=''):
    if layer_name not in LAYER_IDS:
        LAYER_IDS[layer_name] = 0
        return 0
    else:
        LAYER_IDS[layer_name] += 1
        return LAYER_IDS[layer_name]

In [14]:
from abc import abstractmethod
class Aggregator(object):
  def __init__(self, batch_size, dim, dropout, act, name) -> None:
      super().__init__()
      if not name:
        layer = self.__class__.__name__.lower()
        name = layer + '_' + str(get_layer_id(layer))
      self.name = name
      self.dropout = dropout
      self.act = act
      self.batch_size = batch_size
      self.dim = dim

  def __call__(self, self_vectors, neighbor_vectors, neighbor_relations, user_embeddings):
      return self._call(self_vectors, neighbor_vectors, neighbor_relations, user_embeddings)

  @abstractmethod
  def _call(self, self_vectors, neighbor_vectors, neighbor_relations, user_embeddings):
    '''
    Parameters:
      self_vectors - 实体自身的表示向量（当前跳数h下的实体嵌入向量） 大小[batch_size, -1, dim]
      neighbor_vectors - 当前实体近邻实体的表示向量（h+1跳的实体嵌入向量）大小[batch_size, -1, n_neighbor, dim]
      neighbor_relations - 当前跳数h下的关系嵌入向量 大小[batch_size, -1, n_neighbor, dim]
      user_embeddings - 用户嵌入向量 大小[batch_size, dim]
    '''
    pass

  def _mix_neighbor_vectors(self, neighbor_vectors, neighbor_relations, user_embeddings):
    user_embeddings = tf.reshape(user_embeddings, [self.batch_size, 1, 1, self.dim])
    # 计算用户与关系的相似度得分后取均值
    user_relation_scores = tf.reduce_mean(user_embeddings * neighbor_relations, axis=-1)
    # 送入softmax归一化
    user_relation_scores_normalized = tf.nn.softmax(user_relation_scores, dim=-1)
    user_relation_scores_normalized = tf.expand_dims(user_relation_scores_normalized, axis=-1) # [batch_size, -1, n_neighbor, 1]
    # 用户_关系相似度与近邻实体集合以线性方式结合
    # aixs=2是将每个实体的近邻实体结合 上面的-1是把embedding的dim结合 （为啥呀？？）
    neighbors_aggregated = tf.reduce_mean(user_relation_scores_normalized * neighbor_vectors, axis=2) # [batch_size, -1, dim]
    return neighbors_aggregated

In [15]:
class ConcatAggregator(Aggregator):
  def __init__(self, batch_size, dim, dropout=0., act=tf.nn.relu, name=None):
    super(ConcatAggregator, self).__init__(batch_size, dim, dropout, act, name)

    with tf.variable_scope(self.name):
        self.weights = tf.get_variable(
            shape=[self.dim * 2, self.dim], initializer=tf.contrib.layers.xavier_initializer(), name='weights')
        self.bias = tf.get_variable(shape=[self.dim], initializer=tf.zeros_initializer(), name='bias')

  def _call(self, self_vectors, neighbor_vectors, neighbor_relations, user_embeddings):
    # [batch_size, -1, dim]
    neighbors_agg = self._mix_neighbor_vectors(neighbor_vectors, neighbor_relations, user_embeddings)
    
    # 论文中用的是拼接的方式结合本层实体和近邻实体表示向量
    output = tf.concat([self_vectors, neighbors_agg], axis=-1) # [batch_size, -1, dim * 2]
    output = tf.reshape(output, [-1, self.dim * 2]) # [-1, dim * 2]
    output = tf.nn.dropout(output, keep_prob=1-self.dropout)

    output = tf.matmul(output, self.weights) + self.bias # [-1, dim]

    output = tf.reshape(output, [self.batch_size, -1, self.dim]) # [batch_size, -1, dim]

    return self.act(output)

In [16]:
aggregators=[]
n_neighbor=arg['n_neighbors']

#### aggregate entities

In [17]:
for i in range(n_iter):
    # 初始化聚合器 聚合的过程与塔式结构有一点类似
    if i == n_iter - 1: 
        aggregator = ConcatAggregator(batch_size, dim, act=tf.nn.tanh, dropout=0.4) # 最后一次迭代用tanh激活
    else:
        aggregator = ConcatAggregator(batch_size, dim, dropout=0.4) # 其余用relu
    aggregators.append(aggregator)

    entity_vectors_next_iter = []
    # 对应论文算法1 5~8行
    for hop in range(n_iter - i):
        shape = [batch_size, -1, n_neighbor, dim]
        # 聚合器参数分别是，实体自身的embedding，实体邻居的embedding，
        # 实体与邻居实体相连的关系embedding，用户embedding（用于计算个g(u,r)）
        vector = aggregator(self_vectors=entity_vectors[hop],
                            neighbor_vectors=tf.reshape(entity_vectors[hop + 1], shape),
                            neighbor_relations=tf.reshape(relation_vectors[hop], shape),
                            user_embeddings=user_embedding)
        entity_vectors_next_iter.append(vector)
    # 更新实体embedding，因为在上面的for循环中进行了聚合
    entity_vectors = entity_vectors_next_iter

item_embeddings = tf.reshape(entity_vectors[0], [batch_size, dim])

#### aggregate users
和上面聚集实体几乎是一样的

In [18]:
class UserAggregator(object):
    def __init__(self, batch_size, dim, dropout, act, name):
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_id(layer))
        self.name = name
        self.dropout = dropout
        self.act = act
        self.batch_size = batch_size
        self.dim = dim

    def __call__(self, self_vectors, neighbor_vectors, neighbor_relations, user_embeddings, neighbors_embeddings):
        outputs = self._call(self_vectors, neighbor_vectors, neighbor_relations, user_embeddings, neighbors_embeddings)
        return outputs

    @abstractmethod
    def _call(self, self_vectors, neighbor_vectors, neighbor_relations, user_embeddings, neighbors_embeddings):
        # dimension:
        # self_vectors: [batch_size, -1, dim]
        # neighbor_vectors: [batch_size, -1, n_neighbor, dim]
        # neighbor_relations: [batch_size, -1, n_neighbor, dim]
        # user_embeddings: [batch_size, dim]
        pass

    def _mix_neighbor_vectors(self, neighbor_vectors, neighbor_relations, user_embeddings):
        # [batch_size, 1, dim]
        user_embeddings = tf.reshape(user_embeddings, [self.batch_size, 1, self.dim])

        # [batch_size, n_neighbor]
        # 计算用户关系得分时，根据最后一个维度方向上的平均数（dims个值）进行标准化得分，softmax将得分标准化的同时，让它们和为1
        user_relation_scores = tf.reduce_mean(user_embeddings * neighbor_relations, axis=-1)
        user_relation_scores_normalized = tf.nn.softmax(user_relation_scores, dim=-1)

        # [batch_size, n_neighbor, 1]
        user_relation_scores_normalized = tf.expand_dims(user_relation_scores_normalized, axis=-1)

        # [batch_size, dim]
        neighbors_aggregated = tf.reduce_mean(user_relation_scores_normalized * neighbor_vectors, axis=1)
        return neighbors_aggregated

In [19]:
class SumAggregator(UserAggregator):
    def __init__(self, batch_size, dim, dropout=0., act=tf.nn.relu, name=None):
        super(SumAggregator, self).__init__(batch_size, dim, dropout, act, name)

        with tf.variable_scope(self.name):
            self.weights = tf.get_variable(
                shape=[self.dim * 2, self.dim], initializer=tf.contrib.layers.xavier_initializer(),
                name='user_agg_weights')
            self.bias = tf.get_variable(shape=[self.dim], initializer=tf.zeros_initializer(), name='user_agg_bias')

    def _call(self, self_vectors, neighbor_vectors, neighbor_relations, user_embeddings, neighbors_embeddings):
        # [batch_size, -1, dim]
        neighbors_agg = self._mix_neighbor_vectors(neighbor_vectors, neighbor_relations, user_embeddings)
        # # below 3 row test for HKGCN without neighbor user
        neighbors_user = self._mix_neighbor_vectors(neighbors_embeddings, neighbors_embeddings, user_embeddings)
        output = tf.concat([neighbors_user, neighbors_agg], axis=-1)
        output = tf.reshape(output, [-1, self.dim * 2])
        # output = tf.reshape(neighbors_agg, [-1, self.dim])
        # [-1, dim]
        # output = tf.reshape(self_vectors + neighbors_agg, [-1, self.dim])

        output = tf.nn.dropout(output, keep_prob=1 - self.dropout)
        output = tf.matmul(output, self.weights) + self.bias

        return self.act(output + self_vectors)

In [20]:
user_embeddings = tf.nn.embedding_lookup(user_emb_matrix, user_indicies)

# [batch_size,n_interact,dim]
interactItemEmb = tf.nn.embedding_lookup(entity_emb_matrix, interact_item)
neighbor_usersEmb = tf.nn.embedding_lookup(user_emb_matrix, neighbor_users)

aggragator = SumAggregator(batch_size, dim, act=tf.nn.tanh)
user_embeddings = aggragator(user_embeddings, interactItemEmb, interactItemEmb, user_embeddings, neighbor_usersEmb)

In [21]:
output = tf.matmul(user_embeddings, output_weights) + output_bias