-
Notifications
You must be signed in to change notification settings - Fork 1k
/
evaluate_on_trainset.py
41 lines (30 loc) · 1.08 KB
/
evaluate_on_trainset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
This module descibes how to test the performances of an algorithm on the
trainset.
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from surprise import Dataset
from surprise import SVD
from surprise import accuracy
from surprise.model_selection import KFold
data = Dataset.load_builtin('ml-100k')
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)
testset = trainset.build_testset()
predictions = algo.test(testset)
# RMSE should be low as we are biased
accuracy.rmse(predictions, verbose=True) # ~ 0.68 (which is low)
# We can also do this during a cross-validation procedure!
print('CV procedure:')
kf = KFold(n_splits=3)
for i, (trainset_cv, testset_cv) in enumerate(kf.split(data)):
print('fold number', i + 1)
algo.fit(trainset_cv)
print('On testset,', end=' ')
predictions = algo.test(testset_cv)
accuracy.rmse(predictions, verbose=True)
print('On trainset,', end=' ')
predictions = algo.test(trainset_cv.build_testset())
accuracy.rmse(predictions, verbose=True)