In [1]:
%run ./MovieLens.ipynb

In [2]:
# 设置超参数
k = 20
lambda_ = 0.5

In [3]:
# 相邻算法
def PCC_user(u, w):
    # 求用户u和w的交集
    items = I_u_w(u, w)
    n = len(items)
    if n == 0:
        return 0
    s1 = s2 = s3 = 0.0
    for k in items:
        s1 += (ratings[u][k] - r_u[u]) * (ratings[w][k] - r_u[w])
        s2 += (ratings[u][k] - r_u[u]) ** 2
        s3 += (ratings[w][k] - r_u[w]) ** 2
    if s1 == 0 or s2 == 0 or s3 == 0:
        return 0
    return s1 / ((s2 ** 0.5) * (s3 ** 0.5))

def PCC_item(k, j):
    # 求物品k和j的交集
    users = U_k_j(k, j)
    n = len(users)
    if n == 0:
        return 0
    s1 = s2 = s3 = 0.0
    for u in users:
        s1 += (ratings[u][k] - r_u[u]) * (ratings[u][j] - r_u[u])
        s2 += (ratings[u][k] - r_u[u]) ** 2
        s3 += (ratings[u][j] - r_u[u]) ** 2
    if s1 == 0.0 or s2 == 0.0 or s3 == 0.0:
        return 0.0
    return s1 / ((s2 ** 0.5) * (s3 ** 0.5))

In [4]:
# 求用户和物品的相邻系数矩阵
s_uw = np.zeros((user_num + 1, user_num + 1), float)
for i in range(1, user_num + 1):
    for j in range(1, i + 1):
        if j == i:
            s_uw[i][j] = 1.0
        else:
            s_uw[i][j] = PCC_user(i, j)
            s_uw[j][i] = PCC_user(i, j)
            
s_kj = np.zeros((item_num + 1, item_num + 1), float)
for i in range(1, item_num + 1):
    for j in range(1, i + 1):
        if j == i:
            s_kj[i][j] = 1.0
        else:
            s_kj[i][j] = PCC_item(i, j)
            s_kj[j][i] = PCC_item(i, j)

In [5]:
# 获取k个邻居
def get_user_neighbors(u, j):
    neighbors = np.where(s_uw[u] != 0)[0]
    neighbors = neighbors[neighbors != u]
    neighbors = np.intersect1d(neighbors, U_j(j))
    # 获取用户u与邻居的相关系数数组
    coefficients = s_uw[u][neighbors]
    # 使用argsort函数获取绝对值最大的k个邻居的索引
    return neighbors[np.argsort(coefficients)[::-1][:k]]

def get_item_neighbors(u, j):
    neighbors = np.where(s_kj[j] != 0)[0]
    neighbors = neighbors[neighbors != j]
    neighbors = np.intersect1d(neighbors, I_u(u))
    # 获取物品j与邻居的相关系数数组
    coefficients = s_kj[j][neighbors]
    # 使用argsort函数获取绝对值最大的k个邻居的索引
    return neighbors[np.argsort(coefficients)[::-1][:k]]

In [6]:
# 预测规则
def UCF(u, j):
    # 获取邻居
    neighbors = get_user_neighbors(u, j)
    if len(neighbors) == 0:
        return r_u[u]
    s1 = s2 = 0.0
    for w in neighbors:
        s1 += s_uw[u][w] * (ratings[w][j] - r_u[w])
        s2 += s_uw[u][w]
    return r_u[u] + (s1 / s2)

def ICF(u, j):
    # 获取邻居
    neighbors = get_item_neighbors(u, j)
    if len(neighbors) == 0:
        return r_u[u]
    s1 = s2 = 0.0
    for k in neighbors:
        s1 += s_kj[k][j] * ratings[u][k]
        s2 += s_kj[k][j]
    if s2 == 0:
        return r_u[u]
    return s1 / s2
    
def HCF(u, j):
    return lambda_ * UCF(u, j) + (1 - lambda_) * ICF(u, j)

In [7]:
# 输出结果
predict(UCF, ICF, HCF)

RMSE: 0.9822, MAE: 0.7643
RMSE: 1.0754, MAE: 0.8322
RMSE: 1.0184, MAE: 0.7890
