In [1]:
# -*- coding: utf-8 -*-
import numpy as np
from itertools import islice
from sklearn.model_selection import train_test_split
np.set_printoptions(threshold=np.inf)

## 构造结点类，存储电影或者用户结点

In [2]:
class Node(object):
    def __init__(self):
        self.neighbours = []

In [3]:
class ItemRank(object):
    def __init__(self, np_data):
        self.movie_names = []
        self.user_names = []
        self.movie_nodes = {}
        self.user_nodes = {}
        self.data = np_data

    # 生成图模型
    def generate_graph(self):
        # node = Node()
        # print("******生成图模型中......")
        self.movie_names = list(set(self.data[:, 1].astype(int)))
        self.user_names = list(set(self.data[:, 0].astype(int)))
        self.movie_nodes = {}
        self.user_nodes = {}
        for movie in self.movie_names:
            node = Node()
            node.name = movie
            self.movie_nodes[movie] = node
        for user in self.user_names:
            node = Node()
            node.name = user
            self.user_nodes[user] = node
        # 如果用户看过某部电影，则将这部电影加入到用户的neighbours中；对电影同样如此
        for i in range(len(self.data[:, 0])):
            self.user_nodes[self.data[i, 0].astype(int)].neighbours.append(self.movie_nodes[self.data[i, 1].astype(int)])
            self.movie_nodes[self.data[i, 1].astype(int)].neighbours.append(self.user_nodes[self.data[i, 0].astype(int)])

    # 根据图模型生成相关系数矩阵
    def generate_coef_from_graph(self):
        print("******此刻正在计算相关系数矩阵......")
        correlation_matrix = np.zeros((len(self.movie_names), len(self.movie_names)))
        for movie_name in self.movie_nodes.keys():
            for user in self.movie_nodes[movie_name].neighbours:
                for movie in user.neighbours:
                    if movie != self.movie_nodes[movie_name]:
                        correlation_matrix[self.movie_names.index(movie_name), self.movie_names.index(movie.name)] += 1
        for c in range(len(correlation_matrix[1, :])):
            correlation_matrix[:, c] /= sum(correlation_matrix[:, c])
        self.correlation_matrix = correlation_matrix

    # itemrank公式
    def item_rank(self, alpha, ir, d):
        print("******此刻正在计算IR......")
        return alpha * np.dot(self.correlation_matrix, ir) + (1 - alpha) * d

    # 生成评分向量
    def generate_d(self, user_name):
        d = np.zeros(len(self.movie_names))
        for i in range(len(self.data[:, 0])):
            if self.data[i, 0].astype(int) == user_name:
                d[self.movie_names.index(self.data[i, 1].astype(int))] = self.data[i, 2].astype(float)
        return d

## main函数

In [4]:
with open("ratings.csv") as file:
    data = []
    for line in islice(file, 1, None):       
        data.extend(line.rstrip("\n").split(","))
np_data = np.array(data).reshape(-1, 4)
# train_data, test_data = train_test_split(np_data, train_size=0.8)
train_data = np_data
item_rank = ItemRank(train_data)

## 生成图模型

In [5]:
item_rank.generate_graph()
item_rank.movie_names

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 65567,
 34,
 35,
 36,
 37,
 32797,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 65585,
 50,
 49,
 52,
 53,
 54,
 55,
 57,
 58,
 32825,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 98369,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 32840,
 76,
 32844,
 78,
 79,
 80,
 81,
 82,
 83,
 77,
 85,
 86,
 32853,
 88,
 89,
 84,
 87,
 92,
 93,
 94,
 95,
 131168,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 65642,
 107,
 65638,
 163949,
 110,
 111,
 112,
 113,
 32882,
 114,
 116,
 117,
 118,
 119,
 121,
 122,
 123,
 32892,
 125,
 126,
 124,
 65665,
 130,
 32898,
 132,
 131,
 129,
 135,
 98441,
 137,
 140,
 141,
 144,
 145,
 146,
 147,
 65682,
 149,
 150,
 151,
 152,
 153,
 154,
 65685,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 32943,
 183,


## 计算系数矩阵

In [6]:
%%time
item_rank.generate_coef_from_graph()

******此刻正在计算相关系数矩阵......
Wall time: 1h 26min 10s


## 生成评分矩阵

In [7]:
# 选取665号用户来进行计算
d = item_rank.generate_d(user_name=665)
d

array([ 0.,  3.,  3.,  0.,  3.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  4.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  4.,  0.,  2.,  0.,  0.,  0.,  0.,  2.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  4.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  2.,  0.,  3.,  0.,  2.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  2.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  4.,  1.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  4.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        3.,  0.,  0.,  0.,  3.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  4.,  0.,  0.,  0.,  0.,  0.,
        0.,  3.,  0.,  0.,  5.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  3.,  0.,  0.,  0.,  0.,  0.,  3.,  0.,  0

## 迭代计算IR

In [8]:
IR = np.ones(len(item_rank.movie_names))
IR = d
covered = False
counter = 0
while not covered:
    counter += 1
    old_IR = IR
    IR = item_rank.item_rank(0.85, IR, d)
    covered = (old_IR - IR < 0.0001).all()
print("after", counter, "counts")

******此刻正在计算IR......
******此刻正在计算IR......
******此刻正在计算IR......
******此刻正在计算IR......
******此刻正在计算IR......
******此刻正在计算IR......
******此刻正在计算IR......
******此刻正在计算IR......
******此刻正在计算IR......
after 9 counts


## IR值

In [9]:
IR

array([  1.53479941e+00,   1.27681471e+00,   8.84612781e-01,
         8.98629343e-02,   9.23445276e-01,   8.16723614e-01,
         4.67195254e-01,   4.23792368e-02,   7.79119907e-02,
         8.00316944e-01,   7.69658577e-01,   1.96950295e-01,
         1.15095242e-01,   3.11379403e-01,   1.68293059e-01,
         1.48885928e+00,   6.75740167e-01,   2.13324005e-01,
         7.84589819e-01,   1.36941262e-01,   7.98543366e-01,
         3.70602067e-01,   1.73337569e-01,   3.10942287e-01,
         7.60707106e-01,   4.93776517e-02,   8.05579651e-02,
         1.73076677e-01,   3.09227703e-01,   9.36909388e-02,
         2.95613403e-01,   1.80636768e+00,   2.20770794e-02,
         1.43501624e+00,   1.13830794e-01,   7.20065245e-01,
         3.29640105e-02,   7.55007054e-03,   1.30117957e+00,
         3.66445159e-02,   1.59203439e-01,   1.46417760e-01,
         1.39201861e-01,   3.66303921e-01,   4.51647620e-01,
         8.78272464e-02,   1.39636322e+00,   1.16726024e+00,
         8.71377724e-02,