In [48]:
import math

import pandas as pd
import numpy as np
import random

from tqdm import tqdm

#读入数据集
u1_base = pd.read_csv('ml-100k/u1.base', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])
u1_test = pd.read_csv('ml-100k/u1.test', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])

#初始化
user_num = u1_base['uid'].max()
item_num = u1_base['iid'].max()

rating_matrix = np.zeros((user_num, item_num), float)
y_ui = np.zeros((user_num, item_num), int)

ui_set = set()
#base记录转化为matrix
for index, row in u1_base.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    rating = row['rate']
    if rating>=4:
        ui_set.add((user_id,item_id))
        rating_matrix[user_id, item_id] = rating
        y_ui[user_id, item_id] = 1

R = y_ui.sum()
#全局平均
GlobalAverage = R / user_num/item_num

#计算四个参数 user_means item_means user_bias item_bias
rating_sum_row = [sum(row) for row in rating_matrix]
y_sum_row = [sum(row) for row in y_ui]

rating_sum_col = [sum(column) for column in zip(*rating_matrix)]
y_sum_col = [sum(column) for column in zip(*y_ui)]

item_means = []
for i in range(item_num):
    if y_sum_col[i] == 0:
        item_means.append(GlobalAverage)
    else:
        item_means.append(rating_sum_col[i] / y_sum_col[i])

item_bias = []
for i in range(item_num):
    if y_sum_col[i] == 0:
        item_bias.append(0)
    else:
        sum_bias = 0
        for j in range(user_num):
            sum_bias += rating_matrix[j][i]
        item_bias.append(sum_bias / user_num - GlobalAverage)


In [49]:
#用户u未评价过的物品集
def I_unob_u(user_id):
    return np.where(y_ui[user_id] == 0)[0]
def sample_item_unob(user_id):
    unob_items = I_unob_u(user_id)
    if len(unob_items)>0:
        return np.random.choice(unob_items)
    else:
        return -1

In [50]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

In [51]:
def BPR(u,i):
    return np.dot(U[u], V[i]) + item_bias[i]

In [52]:
T = 500
lr = 0.01
lmda = 0.01

d = 20
U = np.random.rand(user_num, d)
V = np.random.rand(item_num, d)
U = (U - 0.5) * 0.01
V = (V - 0.5) * 0.01

In [53]:
S_list = list(ui_set)
for t in range(T):
    for i in tqdm(range(R),desc=f"Epoch {t+1}/{T}",leave=False):
        sampled_ui = random.sample(S_list, 1)[0]
        user_id = sampled_ui[0]
        item_id = sampled_ui[1]
        unlike_item = sample_item_unob(user_id)
        
        r_uij = BPR(user_id,item_id)-BPR(user_id,unlike_item)
        e = -sigmoid(-r_uij)
        
        delta_Uu = e * (V[item_id]-V[unlike_item]) + lmda * U[user_id]
        delta_Vi = e * U[user_id] + lmda * V[item_id]
        delta_Vj = e * (-U[user_id]) + lmda * V[unlike_item]
        delta_bi = e + lmda * item_bias[user_id]
        delta_bj = -e + lmda * item_bias[unlike_item]
        #update 

        U[user_id] -= lr * delta_Uu
        V[item_id] -= lr * delta_Vi
        V[unlike_item] -= lr * delta_Vj
        item_bias[item_id] -= lr * delta_bi
        item_bias[unlike_item] -= lr * delta_bj

                                                                       

In [54]:
#test
I_u_preferred = {}
U_te = set()
for index, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    rating = row['rate']
    if rating >= 4:
        if user_id not in I_u_preferred:
            I_u_preferred[user_id] = set()
        I_u_preferred[user_id].add(item_id)
        U_te.add(user_id)

In [55]:
#为用户u推荐的物品
def get_rec_items(u,R_HAT,k):
    unob_items = I_unob_u(u)
    items_pred = {i:R_HAT(u,i) for i in unob_items }
    # 根据预测评分降序排序
    sorted_items = sorted(items_pred.items(), key=lambda x: x[1], reverse=True)
    # 输出前 k 个物品及其评分
    top_k_items = sorted_items[:k]
    return [k for k,v in top_k_items]


In [56]:
def Pre_at_k(R_HAT, k):
    Pre = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]  #用户实际喜欢项目 
        Pre_u = len(set(rec_items).intersection(set(pre_items))) / k
        Pre += Pre_u
    return Pre / len(U_te)


def Rec_at_k(R_HAT, k):
    Rec = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]
        Rec_u = len(set(rec_items).intersection(set(pre_items))) / len(pre_items)
        Rec += Rec_u
    return Rec / len(U_te)


def NDCG_at_k(R_HAT, k):
    NDCG = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]
        DCG_u = 0
        for pos in range(k):
            if rec_items[pos] in pre_items:
                DCG_u += 1 / math.log(pos + 2)
        IDCG_u = sum(1.0 / math.log(pos + 2) for pos in range(min(len(pre_items), k)))  #Zu
        NDCG += DCG_u / IDCG_u
    return NDCG / len(U_te)


In [58]:
print(f" Pre: {Pre_at_k(BPR, 5):.4f}")
print(f" Rec: {Rec_at_k(BPR, 5):.4f}")
print(f"NDCG: {NDCG_at_k(BPR, 5):.4f}")

 Pre: 0.4022
 Rec: 0.1281
NDCG: 0.4353
