In [1]:
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline
from surprise import Dataset, Reader
from surprise import accuracy
from surprise.model_selection import KFold

In [2]:
# read data
reader = Reader(line_format='user item rating timestamp', sep='::', skip_lines=0)
data = Dataset.load_from_file('data/ratings.dat', reader = reader)
train_set = data.build_full_trainset()

In [7]:
# initialize models
models = {}
models['KNNBasic'] = KNNBasic(k=50, min_k = 3, sim_options={'user_based': True}, verbose=True)
models['KNNWithMeans'] = KNNWithMeans(k=50, min_k = 3, sim_options={'user_based': True}, verbose=True)
models['KNNWithZScore'] = KNNWithZScore(k=50, min_k = 3, sim_options={'user_based': True}, verbose=True)
models['KNNBaseline'] = KNNBaseline(k=50, min_k = 3, sim_options={'user_based': True}, verbose=True)

In [8]:
# k fold
kf = KFold(n_splits=3)
best_model_rmse = ''
best_model_mse = ''
best_rmse = 1e10
best_mse = 1e10

In [9]:
# search best
for key in models:
    avg_rmse = 0.0
    avg_mse = 0.0
    for trainset, testset in kf.split(data):
        models[key].fit(trainset)
        predictions = models[key].test(testset)
        avg_rmse += accuracy.rmse(predictions, verbose = True)
        avg_mse += accuracy.mse(predictions, verbose = True)
    avg_rmse /= 3
    avg_mse /= 3
    if best_rmse > avg_rmse:
        best_model_rmse = key
        best_rmse = avg_rmse
    if best_mse > avg_mse:
        best_model_mse = key
        best_mse = avg_mse

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9285
MSE: 0.8621
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9306
MSE: 0.8661
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9294
MSE: 0.8637
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9299
MSE: 0.8648
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9309
MSE: 0.8666
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9291
MSE: 0.8631
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9311
MSE: 0.8670
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9300
MSE: 0.8649
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9300
MSE: 0.8649
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.8966
M

In [10]:
print("Best model in RMSE is {0}, with RMSE {1}".format(best_model_rmse, best_rmse))
print("Best model in MSE is {0}, with MSE {1}".format(best_model_mse, best_mse))

Best model in RMSE is KNNBaseline, with RMSE 0.8975108129944771
Best model in MSE is KNNBaseline, with MSE 0.8055262275371557
