In [1]:
import numpy as np
import torch

In [2]:
# 读取训练集
user_num = 943
item_num = 1682
d = 20
alpha_u, alpha_v, beta_u, beta_v = 0.01, 0.01, 0.01, 0.01
lr = 0.01
epochs = 100

ratings = np.zeros((user_num + 1, item_num + 1), int)
y_ui = np.zeros((user_num + 1, item_num + 1), int)

with open('../data/ml-100k/u1.base', 'r') as file:
    for line in file.readlines():
        user, item, rating, timestamp = line.split('	')
        ratings[int(user)][int(item)] = int(rating)
        y_ui[int(user)][int(item)] = 1

p = y_ui.sum()
density = p / (user_num * item_num)
r_ = ratings.sum() / p
n = y_ui.sum()

r_1 = ratings.sum(axis=0)
r_2 = ratings.sum(axis=1)
r_3 = y_ui.sum(axis=0)
r_4 = y_ui.sum(axis=1)

# 求四个参数
r_u = np.zeros(user_num + 1, float)
for i in range(1, user_num + 1):
    r_u[i] = (r_ if r_4[i] == 0 else r_2[i] / r_4[i])

r_i = np.zeros(item_num + 1, float)
for i in range(1, item_num + 1):
    r_i[i] = (r_ if r_3[i] == 0 else r_1[i] / r_3[i])

b_u = np.zeros(user_num + 1, float)
for i in range(1, user_num + 1):
    b_u[i] = (0 if r_4[i] == 0 else (y_ui[i] * (ratings[i] - r_i)).sum() / r_4[i])

b_i = np.zeros(item_num + 1, float)
for i in range(1, item_num + 1):
    b_i[i] = (0 if r_3[i] == 0 else (y_ui[:, i] * (ratings[:, i] - r_u)).sum() / r_3[i])

In [3]:
# 损失函数
def MAE(ratings_hat, ratings, y_ui, n):
    return abs(ratings_hat * y_ui - ratings).sum() / n

def RMSE(ratings_hat, ratings, y_ui, n):
    return (((ratings_hat * y_ui - ratings) ** 2).sum() / n) ** 0.5

In [4]:
def PURE_SVD():
    ratings_ = ratings.copy()
    # 填充平均值
    for i in range(1, user_num + 1):
        for j in range(1, item_num + 1):
            if y_ui[i][j] == 0:
                ratings_[i][j] = r_u[i]
            # 每行减去均值
            ratings_[i][j] -= r_u[i]
    u, s, v = np.linalg.svd(ratings_)
    ratings_hat = u[:, :d] @ np.diag(s[:d]) @ v[:d, :]
    for i in range(1, user_num + 1):
        ratings_hat[i] += r_u[i]
    return ratings_hat

def RSVD(lr):
    ratings_ = torch.from_numpy(ratings)
    # 初始化参数
    u = torch.normal(0, 1, (user_num + 1, d), requires_grad=True)
    v = torch.normal(0, 1, (d, item_num + 1), requires_grad=True)
    # 训练
    for epoch in range(epochs):
        # 定义损失函数
        loss = (((ratings_ - u @ v) ** 2) / 2 + (alpha_u / 2) * (torch.norm(u) ** 2) + (alpha_v / 2) * (torch.norm(v) ** 2)) * torch.from_numpy(y_ui)
        loss.mean().backward()
        with torch.no_grad():
            # 更新u
            u -= lr * u.grad
            u.grad.zero_()
            # 更新v
            v -= lr * v.grad
            v.grad.zero_()
        # 降低学习率
        lr *= 0.9
    ratings_hat = u @ v
    for i in range(1, user_num + 1):
        for j in range(1, item_num + 1):
            ratings_hat[i][j] = min(5.0, ratings_hat[i][j])
            ratings_hat[i][j] = max(1.0, ratings_hat[i][j])
    return ratings_hat.detach().numpy()

In [None]:
# 预测
ratings_test = np.zeros((user_num + 1, item_num + 1), int)
y_ui_test = np.zeros((user_num + 1, item_num + 1), int)

with open('../data/ml-100k/u1.test', 'r') as file:
    for line in file.readlines():
        user, item, rating, timestamp = line.split('	')
        ratings_test[int(user)][int(item)] = int(rating)
        y_ui_test[int(user)][int(item)] = 1

test_num = y_ui_test.sum()

# 评分规则
ratings_hat1 = PURE_SVD()
ratings_hat2 = RSVD(lr)

print(f"{RMSE(ratings_hat1, ratings_test, y_ui_test, test_num):.4f}, {MAE(ratings_hat1, ratings_test, y_ui_test, test_num):.4f}")
print(f"{RMSE(ratings_hat2, ratings_test, y_ui_test, test_num):.4f}, {MAE(ratings_hat2, ratings_test, y_ui_test, test_num):.4f}")