In [4]:
from scipy.sparse import coo_matrix
import numpy as np
import sys
# 引入process
sys.path.append(r"F:\\FictionRecommend\\")
import Ipynb_importer
import process

importing Jupyter notebook from process.ipynb


In [5]:
# 测试文件
log_file = "F:/badou/tmp/data/recommender/personalRank/log.txt"

In [14]:
def graph_to_m(graph):
    '''
    :param graph:
        从用户评分文件中得到的 二分图
    :return:
        一个稀疏矩阵 sparse mat M 矩阵
        list 所有的顶点
        dict 顶点的位置
    '''
    # graph.keys() 返回值是dict_keys类型，需要转为list
    vertex = list(graph.keys())
    
    # 记录每一个顶点位置的数据结构
    address_dict = {}
    # 指定行和列
    total_len = len(vertex)
    
    for index in range(len(vertex)):
        # 构造M矩阵，构建顶点，通过矩阵可以得到每一行对应的是哪一个顶点
        address_dict[vertex[index]] = index
    
    # 存储行索引，列索引以及对应的数值
    row = []
    col = []
    data = []
    for element_i in graph:
        # M矩阵第i行z列的数值，是顶点i到顶点z有没有路径连通，如果有数值那么就是顶点i出度的倒数
        weight = round(1 / len(graph[element_i]), 3)
        # 行索引
        row_index = address_dict[element_i]
        # 列索引
        for element_j in graph[element_i]:
            col_index = address_dict[element_j]
            row.append(row_index)
            col.append(col_index)
            data.append(weight)
    
    # 初始化矩阵
    row = np.array(row)
    col = np.array(col)
    data = np.array(data)
    
    # 同时指定行和列
    m = coo_matrix((data, (row, col)), shape=(total_len, total_len))
    
    # 矩阵M，所有的顶点，顶点的位置    
    return m, vertex, address_dict

In [7]:
graph = process.get_graph_from_data(log_file)

In [12]:
graph

{'A': {'item_a': 1, 'item_b': 1, 'item_d': 1},
 'B': {'item_a': 1, 'item_c': 1},
 'C': {'item_b': 1, 'item_e': 1},
 'D': {'item_c': 1, 'item_d': 1},
 'item_a': {'A': 1, 'B': 1},
 'item_b': {'A': 1, 'C': 1},
 'item_c': {'B': 1, 'D': 1},
 'item_d': {'A': 1, 'D': 1},
 'item_e': {'C': 1}}

In [15]:
m, vertex, address_dict = graph_to_m(graph)

In [17]:
# A 的出度是 3，所以A对应的这一行中的值都应该是1/3
m.todense()

matrix([[0.   , 0.333, 0.333, 0.333, 0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.5  , 0.   , 0.   , 0.   , 0.5  , 0.   , 0.   , 0.   , 0.   ],
        [0.5  , 0.   , 0.   , 0.   , 0.   , 0.   , 0.5  , 0.   , 0.   ],
        [0.5  , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.5  ],
        [0.   , 0.5  , 0.   , 0.   , 0.   , 0.5  , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.5  , 0.   , 0.   , 0.   , 0.5  ],
        [0.   , 0.   , 0.5  , 0.   , 0.   , 0.   , 0.   , 0.5  , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 1.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.5  , 0.   , 0.5  , 0.   , 0.   , 0.   ]])

In [18]:
vertex

['A', 'item_a', 'item_b', 'item_d', 'B', 'item_c', 'C', 'item_e', 'D']

In [19]:
address_dict

{'A': 0,
 'B': 4,
 'C': 6,
 'D': 8,
 'item_a': 1,
 'item_b': 2,
 'item_c': 5,
 'item_d': 3,
 'item_e': 7}

In [20]:
def mat_all_point(m_mat, vertex, alpha):
    """
        求单位矩阵E - alpha * m矩阵的转置
        :param m_mat:
            m矩阵
        :param vertex:
            所有的顶点
        :param alpha:
            随机游走的概率
        :return:
            稀疏矩阵 sparse mat
    """
    # 初始化单位阵(只在对角线上有值，且值为1)(使用coo的方式)[coo_matrix]
    total_len = len(vertex)
    row = []
    col = []
    data = []
    
    for index in range(total_len):
        row.append(index)
        col.append(index)
        # 值为1
        data.append(1)
    
    row = np.array(row)
    col = np.array(col)
    data = np.array(data)
    eye_t = coo_matrix((data, (row, col)), shape=(total_len, total_len))
    
    print(eye_t.todense())
    
    # # tocsr, 转为csr格式，可以使运算更快
    return eye_t.tocsr() - alpha * m_mat.tocsr().transpose()

In [22]:
mat_all_point(m, vertex, 0.8).todense()

[[1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 1]]


matrix([[ 1.    , -0.4   , -0.4   , -0.4   ,  0.    ,  0.    ,  0.    ,
          0.    ,  0.    ],
        [-0.2664,  1.    ,  0.    ,  0.    , -0.4   ,  0.    ,  0.    ,
          0.    ,  0.    ],
        [-0.2664,  0.    ,  1.    ,  0.    ,  0.    ,  0.    , -0.4   ,
          0.    ,  0.    ],
        [-0.2664,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,
          0.    , -0.4   ],
        [ 0.    , -0.4   ,  0.    ,  0.    ,  1.    , -0.4   ,  0.    ,
          0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    ,  0.    , -0.4   ,  1.    ,  0.    ,
          0.    , -0.4   ],
        [ 0.    ,  0.    , -0.4   ,  0.    ,  0.    ,  0.    ,  1.    ,
         -0.8   ,  0.    ],
        [ 0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    , -0.4   ,
          1.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , -0.4   ,  0.    , -0.4   ,  0.    ,
          0.    ,  1.    ]])