In [23]:
import pandas as pd
import numpy as np

#读入数据集
u1_base = pd.read_csv('ml-100k/u1.base', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])
u1_test = pd.read_csv('ml-100k/u1.test', sep='\t', names=['uid', 'iid', 'rate', 'timestamp'])


#定义误差函数
def ERR(rating_matrix, test):
    cnt = 0
    abs_err = 0
    squ_err = 0

    for index, row in test.iterrows():
        user_id = row['uid']
        item_id = row['iid']
        true_rating = row['rate']

        predicted_rating = rating_matrix[user_id - 1][item_id - 1]

        # 计算绝对误差/平方误差

        absolute_error = abs(predicted_rating - true_rating)
        abs_err += absolute_error

        square_error = pow(predicted_rating - true_rating, 2)
        squ_err += square_error

        cnt += 1
    # 计算平均绝对误差/平方误差
    mae = abs_err / cnt
    rmse = (squ_err / cnt) ** 0.5

    return mae, rmse


#初始化
user_num = u1_base['uid'].max()
item_num = u1_base['iid'].max()

rating_matrix = np.zeros((user_num, item_num), float)
y_ui = np.zeros((user_num, item_num), int)

#base记录转化为matrix
for index, row in u1_base.iterrows():
    user_id = row['uid']
    item_id = row['iid']
    rating = row['rate']
    rating_matrix[user_id - 1, item_id - 1] = rating
    y_ui[user_id - 1, item_id - 1] = 1

#全局平均
GlobalAverage = rating_matrix.sum() / y_ui.sum()

#计算四个参数 user_means item_means user_bias item_bias
rating_sum_row = [sum(row) for row in rating_matrix]
y_sum_row = [sum(row) for row in y_ui]

rating_sum_col = [sum(column) for column in zip(*rating_matrix)]
y_sum_col = [sum(column) for column in zip(*y_ui)]

user_means = []
for i in range(user_num):
    if y_sum_row[i] == 0:
        user_means.append(GlobalAverage)
    else:
        user_means.append(rating_sum_row[i] / y_sum_row[i])

item_means = []
for i in range(item_num):
    if y_sum_col[i] == 0:
        item_means.append(GlobalAverage)
    else:
        item_means.append(rating_sum_col[i] / y_sum_col[i])

user_bias = []
for i in range(user_num):
    if y_sum_row[i] == 0:
        user_bias.append(0)
    else:
        sum_bias = 0
        for j in range(item_num):
            sum_bias += y_ui[i][j] * (rating_matrix[i][j] - item_means[j])
        user_bias.append(sum_bias / y_sum_row[i])

item_bias = []
for i in range(item_num):
    if y_sum_col[i] == 0:
        item_bias.append(0)
    else:
        sum_bias = 0
        for j in range(user_num):
            sum_bias += y_ui[j][i] * (rating_matrix[j][i] - user_means[j])
        item_bias.append(sum_bias / y_sum_col[i])

In [24]:
filled_matrix_u = np.copy(rating_matrix)
for idx, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    filled_matrix_u[user_id, item_id] = user_means[user_id]
mae1, rmse1 = ERR(filled_matrix_u, u1_test)

In [25]:
filled_matrix_i = np.copy(rating_matrix)
for idx, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    filled_matrix_i[user_id, item_id] = item_means[item_id]
mae2, rmse2 = ERR(filled_matrix_i, u1_test)

In [26]:
filled_matrix_ui = np.copy(rating_matrix)
for idx, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    filled_matrix_ui[user_id, item_id] = user_means[user_id] / 2 + item_means[item_id] / 2
mae3, rmse3 = ERR(filled_matrix_ui, u1_test)

In [27]:
filled_matrix_BuRi = np.copy(rating_matrix)
for idx, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    filled_matrix_BuRi[user_id, item_id] = user_bias[user_id] + item_means[item_id]
mae4, rmse4 = ERR(filled_matrix_BuRi, u1_test)

In [28]:
filled_matrix_RuBi = np.copy(rating_matrix)
for idx, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    filled_matrix_RuBi[user_id][item_id] = user_means[user_id] + item_bias[item_id]
mae5, rmse5 = ERR(filled_matrix_RuBi, u1_test)

In [29]:
filled_matrix_GAB = np.copy(rating_matrix)
for idx, row in u1_test.iterrows():
    user_id = row['uid'] - 1
    item_id = row['iid'] - 1
    filled_matrix_GAB[user_id][item_id] = GlobalAverage + user_bias[user_id] + item_bias[item_id]
mae6, rmse6 = ERR(filled_matrix_GAB, u1_test)

In [30]:
print(f'(RMSE:{rmse1:.4f},MAE:{mae1:.4f})')
print(f'(RMSE:{rmse2:.4f},MAE:{mae2:.4f})')
print(f'(RMSE:{rmse3:.4f},MAE:{mae3:.4f})')
print(f'(RMSE:{rmse4:.4f},MAE:{mae4:.4f})')
print(f'(RMSE:{rmse5:.4f},MAE:{mae5:.4f})')
print(f'(RMSE:{rmse6:.4f},MAE:{mae6:.4f})')

(RMSE:1.0630,MAE:0.8502)
(RMSE:1.0334,MAE:0.8276)
(RMSE:0.9985,MAE:0.8085)
(RMSE:0.9602,MAE:0.7574)
(RMSE:0.9758,MAE:0.7696)
(RMSE:0.9623,MAE:0.7613)
