In [7]:
import math

import pandas as pd
import numpy as np
import random

from tqdm import tqdm

#读入数据集
u1_base = pd.read_csv('ml-100k/u1.base', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])
u1_test = pd.read_csv('ml-100k/u1.test', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])

#初始化
user_num = u1_base['uid'].max()
item_num = u1_base['iid'].max()

y_ui = np.zeros((user_num, item_num), int)

I_u = {}
U_i = {}
P = set()
#base记录转化为matrix
for index, row in u1_base.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    rating = row['rate']
    if rating >= 4:
        P.add((user_id, item_id))
        y_ui[user_id, item_id] = 1
        I_u.setdefault(user_id,set())
        U_i.setdefault(item_id,set())

        I_u[user_id].add(item_id)
        U_i[item_id].add(user_id)

R = y_ui.sum()
#全局平均
GlobalAverage = R / user_num / item_num

#计算四个参数 user_means item_means user_bias item_bias
y_sum_row = [sum(row) for row in y_ui]

y_sum_col = [sum(column) for column in zip(*y_ui)]

user_bias = []
for i in range(user_num):
    if y_sum_row[i] == 0:
        I_u.setdefault(i,set())
        user_bias.append(0)
    else:
        sum_bias = 0
        for j in range(item_num):
            sum_bias += y_ui[i][j]
        user_bias.append(sum_bias / item_num - GlobalAverage)

item_bias = []
for i in range(item_num):
    if y_sum_col[i] == 0:
        U_i.setdefault(i,set())
        item_bias.append(0)
    else:
        sum_bias = 0
        for j in range(user_num):
            sum_bias += y_ui[j][i]
        item_bias.append(sum_bias / user_num - GlobalAverage)

ui_set_unob = set(zip(*np.where(y_ui == 0)))

In [8]:
P_size = len(P)
rho = 3
A_size = rho * P_size
alpha = 0.5
lr = 0.01
d = 20
lmda = 0.001
T = 100

W = np.random.rand(item_num, d)
V = np.random.rand(item_num, d)
W = (W - 0.5) * 0.01
V = (V - 0.5) * 0.01


In [9]:
def I_u_except_i(u, i):
    Items_u_except_i = I_u[u].copy()
    Items_u_except_i = Items_u_except_i - {i}
    return Items_u_except_i

In [10]:
def U_u_except_i(u, i):
    if len(I_u_except_i(u, i)) == 0:
        return np.zeros(d)
    U_u_exc_i = sum(W[i] for i in I_u_except_i(u, i))
    return U_u_exc_i / len(I_u_except_i(u, i)) ** alpha

In [11]:
def FISM(u, i):
    return user_bias[u] + item_bias[i] + np.dot(U_u_except_i(u, i), V[i])

In [14]:
S_list = list(ui_set_unob)
for t in range(T):
    A = random.sample(S_list, A_size)
    P_union_A = set(P).union(A)
    for i in tqdm(P_union_A, desc=f"Epoch {t + 1}/{T}", leave=False):
        user_id = i[0]
        item_id = i[1]
        r_hat = FISM(user_id, item_id)
        e_ui = 1 - r_hat if i in P else - r_hat
        # if i in A:
        #     e_ui = - r_hat
        # else:
        #     e_ui = 1 - r_hat
        delta_bu = -e_ui + lmda * user_bias[user_id]
        delta_bi = -e_ui + lmda * item_bias[item_id]
        delta_Vi = -e_ui * U_u_except_i(user_id, item_id) + lmda * V[item_id]
        user_bias[user_id] -= lr * delta_bu
        item_bias[item_id] -= lr * delta_bi
        V[item_id] -= lr * delta_Vi
        if len(I_u_except_i(user_id, item_id)) != 0:
            fm = len(I_u_except_i(user_id, item_id)) ** alpha
            i_pie_list = list(I_u_except_i(user_id, item_id))
            #使用这下索引方式加快训练
            W[i_pie_list] -= lr * ((-e_ui / fm) * V[item_id] + lmda * W[i_pie_list])
            # for i_pie in I_u_except_i(user_id, item_id):
            #     delta_Wi = -(e_ui / fm) * V[item_id] + lmda * W[i_pie]
            #     W[i_pie] = lr * delta_Wi

                                                                   

KeyboardInterrupt: 

In [51]:
#test
I_u_preferred = {}
U_te = set()
for index, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    rating = row['rate']
    if rating >= 4:
        if user_id not in I_u_preferred:
            I_u_preferred[user_id] = set()
        I_u_preferred[user_id].add(item_id)
        U_te.add(user_id)

In [52]:
#用户u未评价过的物品集
def I_unob_u(user_id):
    return np.where(y_ui[user_id] == 0)[0]

In [53]:
#为用户u推荐的物品
def get_rec_items(u,R_HAT,k):
    unob_items = I_unob_u(u)
    items_pred = {i:R_HAT(u,i) for i in unob_items }
    # 根据预测评分降序排序
    sorted_items = sorted(items_pred.items(), key=lambda x: x[1], reverse=True)
    # 输出前 k 个物品及其评分
    top_k_items = sorted_items[:k]
    return [k for k,v in top_k_items]


In [54]:
def Pre_at_k(R_HAT, k):
    Pre = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]  #用户实际喜欢项目 
        Pre_u = len(set(rec_items).intersection(set(pre_items))) / k
        Pre += Pre_u
    return Pre / len(U_te)


def Rec_at_k(R_HAT, k):
    Rec = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]
        Rec_u = len(set(rec_items).intersection(set(pre_items))) / len(pre_items)
        Rec += Rec_u
    return Rec / len(U_te)


def NDCG_at_k(R_HAT, k):
    NDCG = 0
    for u in U_te:
        rec_items = get_rec_items(u, R_HAT, k)
        pre_items = I_u_preferred[u]
        DCG_u = 0
        for pos in range(k):
            if rec_items[pos] in pre_items:
                DCG_u += 1 / math.log(pos + 2)
        IDCG_u = sum(1.0 / math.log(pos + 2) for pos in range(min(len(pre_items), k)))  #Zu
        NDCG += DCG_u / IDCG_u
    return NDCG / len(U_te)


In [55]:
print(f"Pre: {Pre_at_k(FISM, 5):.4f}")
print(f"Rec: {Rec_at_k(FISM, 5):.4f}")
print(f"NDCG: {NDCG_at_k(FISM, 5):.4f}")

Pre: 0.3851
Rec: 0.1255
NDCG: 0.4153
