In [4]:
import math

import pandas as pd
import numpy as np
import random

#读入数据集
u1_base = pd.read_csv('ml-100k/u1.base', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])
u1_test = pd.read_csv('ml-100k/u1.test', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])

#初始化
user_num = u1_base['uid'].max()
item_num = u1_base['iid'].max()

rating_matrix = np.zeros((user_num, item_num), float)
y_ui = np.zeros((user_num, item_num), int)

#base记录转化为matrix
for index, row in u1_base.iterrows():
    user_id = row['uid']
    item_id = row['iid']
    rating = row['rate']
    rating_matrix[user_id - 1, item_id - 1] = rating
    y_ui[user_id - 1, item_id - 1] = 1

R = y_ui.sum()
#全局平均
GlobalAverage = rating_matrix.sum() / R

#计算四个参数 user_means item_means user_bias item_bias
rating_sum_row = [sum(row) for row in rating_matrix]
y_sum_row = [sum(row) for row in y_ui]

rating_sum_col = [sum(column) for column in zip(*rating_matrix)]
y_sum_col = [sum(column) for column in zip(*y_ui)]

user_means = []
for i in range(user_num):
    if y_sum_row[i] == 0:
        user_means.append(GlobalAverage)
    else:
        user_means.append(rating_sum_row[i] / y_sum_row[i])

item_means = []
for i in range(item_num):
    if y_sum_col[i] == 0:
        item_means.append(GlobalAverage)
    else:
        item_means.append(rating_sum_col[i] / y_sum_col[i])

user_bias = []
for i in range(user_num):
    if y_sum_row[i] == 0:
        user_bias.append(0)
    else:
        sum_bias = 0
        for j in range(item_num):
            sum_bias += y_ui[i][j] * (rating_matrix[i][j] - GlobalAverage)
        user_bias.append(sum_bias / y_sum_row[i])

item_bias = []
for i in range(item_num):
    if y_sum_col[i] == 0:
        item_bias.append(0)
    else:
        sum_bias = 0
        for j in range(user_num):
            sum_bias += y_ui[j][i] * (rating_matrix[j][i] - GlobalAverage)
        item_bias.append(sum_bias / y_sum_col[i])


In [8]:
#用户u评价过的物品集
def I_u(user_id):
    return np.where(y_ui[user_id] == 1)[0]
#用户u未评价过的物品集
def I_unob_u(user_id):
    return np.where(y_ui[user_id] == 0)[0]

In [9]:
I_u(0),I_unob_u(0)

(array([  0,   1,   2,   3,   4,   6,   7,   8,  10,  12,  14,  15,  17,
         18,  20,  21,  24,  25,  27,  28,  29,  31,  33,  34,  36,  37,
         39,  40,  41,  42,  44,  45,  47,  49,  51,  54,  56,  57,  58,
         62,  65,  67,  70,  74,  76,  78,  82,  86,  87,  88,  92,  93,
         94,  98, 100, 104, 105, 108, 109, 110, 114, 115, 118, 121, 122,
        123, 125, 126, 130, 132, 134, 135, 136, 137, 138, 140, 141, 143,
        145, 146, 148, 151, 152, 155, 157, 161, 164, 165, 166, 167, 168,
        171, 172, 175, 177, 178, 180, 181, 186, 190, 191, 193, 194, 196,
        197, 198, 202, 203, 204, 206, 210, 215, 216, 219, 222, 230, 233,
        236, 237, 238, 239, 243, 244, 245, 246, 248, 250, 255, 256, 260,
        262, 267, 268, 269, 270]),
 array([   5,    9,   11, ..., 1679, 1680, 1681]))

In [None]:
#为用户u推荐的物品
def get_rec_items(u,R_HAT,k):
    unob_items = I_unob_u(u)
    items_pred = {i:R_HAT(u,i) for i in unob_items }
    # 根据预测评分降序排序
    sorted_items = sorted(items_pred.items(), key=lambda x: x[1], reverse=True)
    # 输出前 k 个物品及其评分
    top_k_items = sorted_items[:k]
    return [k for k,v in top_k_items]

In [60]:
# 正负样本集
S_Pos = set()
S_Neg = set()
for index, row in u1_base.iterrows():
    user_id = row['uid'] - 1 
    item_id = row['iid'] - 1
    rating = row['rate']
    if rating >= user_means[user_id]:
        tri = (user_id, item_id, 1)
        S_Pos.add(tri)
    else:
        tri = (user_id, item_id, -1)
        S_Neg.add(tri)
        

In [61]:
d = 20
def items_u_r(u, item, r):
    items = np.where(rating_matrix[u] == r)[0]
    items = items[items != item]
    return items


In [62]:
def U_MPC(u, i, M):
    U_mpc = np.zeros(d, float)
    for rating_class in range(5):
        items = items_u_r(u, i, rating_class + 1)
        items_len = len(items)
        if items_len == 0:
            continue
        U_mpc += np.sum(M[rating_class, items, :], axis=0) / items_len ** 0.5
    return U_mpc

def R_HAT(u, i, miu, U, V, user_bias, item_bias, M):
    return miu + user_bias[u] + item_bias[i] + np.dot(U[u], V[i]) + np.dot(U_MPC(u, i, M), V[i])


In [63]:
U = np.random.rand(user_num + 1, d)
V = np.random.rand(item_num + 1, d)
M = np.random.rand(5, item_num, d)
U = (U - 0.5) * 0.01
V = (V - 0.5) * 0.01
M = (M - 0.5) * 0.01


In [64]:
T = 100
p = 3

In [65]:
def sample_unobserved_positions(matrix, n):
    # 获取所有值为0的元素的坐标
    unobserved_positions = np.argwhere(matrix == 0)
    
    # 检查n是否超过零元素数量
    if n > len(unobserved_positions):
        raise ValueError(f"n={n} 超过了矩阵中元素0的数量：{len(unobserved_positions)}")
    
    # 随机选择n个坐标
    sampled_positions = unobserved_positions[np.random.choice(len(unobserved_positions), n, replace=False)]
    S_Unob = set()
    for i in range(n):
        tri = (sampled_positions[i][0], sampled_positions[i][1],-1)
        S_Unob.add(tri)

    # 返回采样的集合
    return S_Unob

In [66]:
S_Unob = sample_unobserved_positions(y_ui, 3 * R)
S_New = S_Neg | S_Unob | S_Pos
S_list = list(S_New)
sampled_triplet = random.sample(S_list, 1)[0]
sampled_triplet[0],sampled_triplet

(86, (86, 1178, -1))

In [71]:
miu = GlobalAverage
lr = 0.01
lmda = 0.01
for t in range(T):
    S_Unob = sample_unobserved_positions(y_ui, p * R)
    S_New = S_Neg | S_Unob | S_Pos
    S_Num = len(S_New)
    S_list = list(S_New)
    for i in range(S_Num):
        sampled_triplet = random.sample(S_list, 1)[0]
        user_id = sampled_triplet[0]
        item_id = sampled_triplet[1]
        yui = sampled_triplet[2]
        U_mpc = U_MPC(user_id, item_id, M)
        r_pred = R_HAT(user_id, item_id, miu, U, V, user_bias, item_bias, M)
        e = -(1 + math.exp(-yui * r_pred)) * yui
        #计算梯度
        delta_Uu = e * V[item_id] + lmda * U[user_id]
        delta_Vi = e * (U[user_id] + U_mpc) + lmda * V[item_id]
        delta_miu = e
        delta_bu = e + lmda * user_bias[user_id]
        delta_bi = e + lmda * item_bias[item_id]

        for r in range(5):
            i_pie = items_u_r(user_id, item_id, r + 1)
            i_num = len(i_pie)
            if i_num == 0:
                continue
            for i in i_pie:
                delta_M = e * V[item_id] / i_num ** 0.5 + lmda * M[r][i]
                M[r][i] -= lr * delta_M
        #update 

        miu -= lr * delta_miu
        user_bias[user_id] -= lr * delta_bu
        item_bias[item_id] -= lr * delta_bi
        U[user_id] -= lr * delta_Uu
        V[item_id] -= lr * delta_Vi
        print(f'\r(epoch:{t}', end='')   

(epoch:17

KeyboardInterrupt: 

In [5]:
import random

# 定义包含多个三元组的集合
triplet_set = {
    (1, 'apple', 3.14),
    (2, 'banana', 2.71),
    (3, 'cherry', 1.61),
    (4, 'date', 1.41),
    (5, 'elderberry', 2.0)
}

# 将集合转换为列表
triplet_list = list(triplet_set)

# 随机采样 2 个三元组
sampled_triplets = random.sample(triplet_list, 2)

print(sampled_triplets)

[(2, 'banana', 2.71), (3, 'cherry', 1.61)]
