In [86]:
import math

import pandas as pd
import numpy as np
import random

#读入数据集
u1_base = pd.read_csv('ml-100k/u1.base', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])
u1_test = pd.read_csv('ml-100k/u1.test', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])

#初始化
user_num = u1_base['uid'].max()
item_num = u1_base['iid'].max()

rating_matrix = np.zeros((user_num, item_num), float)
y_ui = np.zeros((user_num, item_num), int)

I_u = {}
U_i = {}
#base记录转化为matrix
for index, row in u1_base.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    rating = row['rate']
    if rating >= 4:
        rating_matrix[user_id, item_id] = rating
        y_ui[user_id, item_id] = 1
        if user_id not in I_u:
            I_u[user_id] = set()
        I_u[user_id].add(item_id)
        if item_id not in U_i:
            U_i[item_id] = set()
        U_i[item_id].add(user_id)
        
I_u_preferred = {}
U_te = set()
for index, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    rating = row['rate']
    if rating >= 4:
        if user_id not in I_u_preferred:
            I_u_preferred[user_id] = set()
        I_u_preferred[user_id].add(item_id)
        U_te.add(user_id)


#全局平均
GlobalAverage = y_ui.sum() / user_num / item_num

y_sum_col = [sum(column) for column in zip(*y_ui)]

item_bias = []
for i in range(item_num):
    if y_sum_col[i] == 0:
        item_bias.append(0)
    else:
        sum_bias = 0
        for j in range(user_num):
            sum_bias += y_ui[j][i]
        item_bias.append(sum_bias / user_num - GlobalAverage)


In [87]:
def R_HAT(u,i):
    return item_bias[i]
#用户u未评价过的物品集
def I_unob_u(user_id):
    return np.where(y_ui[user_id] == 0)[0]

In [88]:
#为用户u推荐的物品
def get_rec_items(u, R_HAT, k):
    unob_items = I_unob_u(u)
    items_pred = {i: R_HAT(u, i) for i in unob_items}
    # 根据预测评分降序排序
    sorted_items = sorted(items_pred.items(), key=lambda x: x[1], reverse=True)
    # 输出前 k 个物品及其评分
    top_k_items = sorted_items[:k]
    return [k for k, v in top_k_items]

In [89]:
def Pre_at_k(R_HAT, k):
    Pre = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]  #用户实际喜欢项目 
        Pre_u = len(set(rec_items).intersection(set(pre_items))) / k
        Pre += Pre_u
    return Pre / len(U_te)


def Rec_at_k(R_HAT, k):
    Rec = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]
        Rec_u = len(set(rec_items).intersection(set(pre_items))) / len(pre_items)
        Rec += Rec_u
    return Rec / len(U_te)


def NDCG_at_k(R_HAT, k):
    NDCG = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]
        DCG_u = 0
        for pos in range(k):
            if rec_items[pos] in pre_items:
                DCG_u += 1 / math.log(pos + 2)
        IDCG_u = sum(1.0 / math.log(pos + 2) for pos in range(min(len(pre_items), k)))  #Zu
        NDCG += DCG_u / IDCG_u
    return NDCG / len(U_te)


In [90]:
Pre_at_k(R_HAT,5),Rec_at_k(R_HAT,5),NDCG_at_k(R_HAT,5)

(0.23377192982456205, 0.05712433087638161, 0.25676161522376395)

In [91]:
print(f"Pre: {Pre_at_k(R_HAT,5):.4f}")
print(f"Rec: {Rec_at_k(R_HAT,5):.4f}")
print(f"NDCG: {NDCG_at_k(R_HAT,5):.4f}")

Pre: 0.2338
Rec: 0.0571
NDCG: 0.2568
