In [1]:
from surprise import Dataset
from surprise import Reader
from surprise import BaselineOnly, KNNBasic, NormalPredictor
from surprise import accuracy
from surprise.model_selection import KFold

In [2]:
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
data = Dataset.load_from_file('./ratings.csv', reader=reader)
train_set = data.build_full_trainset()

In [3]:
# ALS优化
bsl_options = {'method': 'als','n_epochs': 5,'reg_u': 12,'reg_i': 5}

In [4]:
algo = BaselineOnly(bsl_options=bsl_options)

In [5]:
kf = KFold(n_splits=3)
for trainset, testset in kf.split(data):
    # 训练并预测
    algo.fit(trainset)
    predictions = algo.test(testset)
    # 计算RMSE
    accuracy.rmse(predictions, verbose=True)

Estimating biases using als...
RMSE: 0.8650
Estimating biases using als...
RMSE: 0.8640
Estimating biases using als...
RMSE: 0.8627


In [8]:
#查看预测集
print(predictions[0])

user: 2374       item: 111        r_ui = 3.00   est = 3.72   {'was_impossible': False}


In [9]:
uid = str(196)
iid = str(302)
# 输出uid对iid的预测结果
pred = algo.predict(uid, iid, r_ui=4, verbose=True)


user: 196        item: 302        r_ui = 4.00   est = 4.07   {'was_impossible': False}


In [10]:
'''
Baseline算法：基于统计的基准预测线打分
bui = mu + bu + bi
其中mu 为所有已知投票数据中投票的均值，
bu为用户的打分相对于平均值的偏差
bi为该item被打分时，相对于平均值得偏差，可反映电影受欢迎程度。 
bui则为基线模型对用户u给物品i打分的预估值。

'''

'\nBaseline算法：基于统计的基准预测线打分\nbui = mu + bu + bi\n其中mu 为所有已知投票数据中投票的均值，\nbu为用户的打分相对于平均值的偏差\nbi为该item被打分时，相对于平均值得偏差，可反映电影受欢迎程度。 \nbui则为基线模型对用户u给物品i打分的预估值。\n\n'