In [None]:
1. 获取转移矩阵
2. 随机游走

In [1]:
from scipy.sparse import coo_matrix
import numpy as np
import pandas as pd


# 数据处理示例
data_path="../data/ml-100k/u.data"
data_fields = ['user_id', 'item_id', 'rating', 'timestamp']
data_df = pd.read_table(data_path, names=data_fields)

# get user number
n_users = max(data_df['user_id'].values)
# get item number
n_items = max(data_df['item_id'].values)


data = np.ones((data_df.shape[0]))
data=data_df.rating.values
row = data_df.user_id-1
col = data_df.item_id-1
UI = coo_matrix((data, (row, col)), shape=(n_users, n_items))
UIUI = UI.dot(UI.transpose()).dot(UI)

# 将UI转换成邻接矩阵
UI=UI.toarray()
UI=(UI>0).astype(int)

In [2]:
# X: 输入的转移矩阵，如UI矩阵
# alpha: 重启系数
def randomWalk(X, alpha=0.9, num_walk=2):
    num_s,num_e=X.shape
    X_norm=X.sum(1,keepdims=True)
    
    # 初始化所有节点的状态
    Z=np.zeros((num_s,num_s+num_e))
    for i in range(num_s):
        Z[i,i]=1
    Z_0=Z.copy()
    
    # 初始化转移矩阵P，P是(num_s+num_e)*(num_s+num_e)的矩阵
    P=np.concatenate((np.zeros((num_s,num_s)),X),axis=1)
    P=np.concatenate((P,np.zeros((num_e,P.shape[1]))),axis=0)
    P_norm=P.sum(1,keepdims=True) # 归一化，等概率随机游走
    P=P/P_norm
    for i in range(P.shape[0]): # 对于全零行进行特殊处理
        if np.isnan(P[i][0]):
            P[i,:num_s]=1/num_s
            P[i,num_s:]=1/num_e
            
    Z=np.mat(Z)
    P=np.mat(P)
    for i in range(num_walk):
        Z=alpha*Z*P+(1-alpha)*Z_0
    return Z

In [3]:
res=randomWalk(UI, alpha=0.9, num_walk=2)
res



matrix([[0.10085896, 0.00085896, 0.00085896, ..., 0.00048157, 0.00048157,
         0.00048157],
        [0.00085896, 0.10085896, 0.00085896, ..., 0.00048157, 0.00048157,
         0.00048157],
        [0.00085896, 0.00085896, 0.10085896, ..., 0.00048157, 0.00048157,
         0.00048157],
        ...,
        [0.00085896, 0.00085896, 0.00085896, ..., 0.00048157, 0.00048157,
         0.00048157],
        [0.00085896, 0.00085896, 0.00085896, ..., 0.00048157, 0.00048157,
         0.00048157],
        [0.00085896, 0.00085896, 0.00085896, ..., 0.00048157, 0.00048157,
         0.00048157]])