In [8]:
from surprise import SVDpp
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise import BaselineOnly, KNNBasic, NormalPredictor
from surprise import accuracy
from surprise.model_selection import KFold, cross_validate
import pandas as pd

In [9]:
# 数据读取
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
data = Dataset.load_from_file('./ratings.csv', reader=reader)
train_set = data.build_full_trainset()

In [10]:
# 使用funkSVD, BiasSVD 和 SVD++
algos = [SVD(biased=False), SVD(biased=True), SVDpp()]
algo_names = ['funkSVD', 'BiasSVD', 'SVD++']

In [11]:
def search_model(algos, algo_names, k):
    assert(k>2)
    best_score = 1e10
    best_algo = None
    best_name = None
    for algo, algo_name in zip(algos, algo_names):
        print('training model {}'.format(algo_name))
        # 定义K折交叉验证迭代器
        kf = KFold(n_splits=k)
        score = 0.0
        for trainset, testset in kf.split(data):
            # 训练并预测
            algo.fit(trainset)
            predictions = algo.test(testset)
            # 计算RMSE
            score += accuracy.rmse(predictions, verbose=True)
            if best_score > score:
                best_score = score
                best_algo = algo
                best_name = algo_name
    return best_algo, best_name, best_score

In [None]:
best_algo, best_name, best_score = search_model(algos, algo_names, 6)
print('The best algorithm is {} with average k_fold rmse {}'.format(best_name, best_score))

# 在整个训练集上重新训练模型
best_algo.train(train_set)

uid = str(196)
iid = str(302)
# 输出uid对iid的预测结果
pred = algo.predict(uid, iid, r_ui=4, verbose=True)
time2=time.time()
print(time2-time1)

training model funkSVD
RMSE: 0.8508
RMSE: 0.8536
RMSE: 0.8520
RMSE: 0.8509
RMSE: 0.8534
RMSE: 0.8531
training model BiasSVD
RMSE: 0.8345
RMSE: 0.8280
RMSE: 0.8318
RMSE: 0.8308
RMSE: 0.8325
RMSE: 0.8275
training model SVD++
