In [1]:
pip install scikit-surprise

Note: you may need to restart the kernel to use updated packages.


In [14]:
from surprise import accuracy, Dataset, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV

In [3]:
data = Dataset.load_builtin('ml-100k')

algoSVD = SVD()
cross_validate(algoSVD, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9280  0.9260  0.9367  0.9357  0.9464  0.9346  0.0073  
MAE (testset)     0.7350  0.7308  0.7368  0.7367  0.7444  0.7368  0.0044  
Fit time          0.31    0.32    0.37    0.35    0.65    0.40    0.13    
Test time         0.06    0.05    0.09    0.06    0.12    0.08    0.03    


{'test_rmse': array([0.92799239, 0.92596437, 0.93667967, 0.93574877, 0.94641509]),
 'test_mae': array([0.73501096, 0.73081802, 0.73684276, 0.73670805, 0.74439869]),
 'fit_time': (0.31156110763549805,
  0.3186066150665283,
  0.3726935386657715,
  0.354877233505249,
  0.6529507637023926),
 'test_time': (0.06041312217712402,
  0.049077749252319336,
  0.0913848876953125,
  0.06402897834777832,
  0.12099933624267578)}

In [4]:
algoSVDpp = SVDpp(n_factors=100, n_epochs=30, lr_all=0.005, reg_all=0.02)
cross_validate(algoSVDpp, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9175  0.9194  0.9250  0.9180  0.9145  0.9189  0.0034  
MAE (testset)     0.7204  0.7181  0.7256  0.7222  0.7162  0.7205  0.0033  
Fit time          24.36   26.07   27.07   25.43   25.64   25.72   0.88    
Test time         4.08    4.51    5.02    3.77    3.71    4.22    0.49    


{'test_rmse': array([0.9174835 , 0.91942004, 0.92497512, 0.91800784, 0.91448093]),
 'test_mae': array([0.72035652, 0.71810875, 0.72558935, 0.72217386, 0.7162001 ]),
 'fit_time': (24.35696029663086,
  26.0717716217041,
  27.07059144973755,
  25.432604551315308,
  25.643888235092163),
 'test_time': (4.078376293182373,
  4.5094239711761475,
  5.0183045864105225,
  3.7749390602111816,
  3.7086665630340576)}

In [5]:
algoNMF = NMF()
cross_validate(algoNMF, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9535  0.9655  0.9677  0.9629  0.9679  0.9635  0.0053  
MAE (testset)     0.7482  0.7615  0.7605  0.7569  0.7582  0.7570  0.0047  
Fit time          1.16    1.21    1.27    1.19    1.15    1.19    0.04    
Test time         0.15    0.15    0.19    0.11    0.17    0.15    0.03    


{'test_rmse': array([0.9534788 , 0.96547463, 0.96768189, 0.96292684, 0.9679166 ]),
 'test_mae': array([0.74820748, 0.76149247, 0.76046923, 0.75688503, 0.7581592 ]),
 'fit_time': (1.1624205112457275,
  1.206655502319336,
  1.2654802799224854,
  1.1889111995697021,
  1.1499085426330566),
 'test_time': (0.15027332305908203,
  0.14899897575378418,
  0.1897127628326416,
  0.11400079727172852,
  0.17172718048095703)}

In [6]:
trainset, testset = train_test_split(data, test_size=0.25)
algo = SVD()
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions)
accuracy.mae(predictions)

RMSE: 0.9350
MAE:  0.7367


0.7367397754430995

In [None]:
param_grid = {
    'n_factors': [50, 100, 150],
    'n_epochs': [20, 30, 40],
    'lr_all': [0.002, 0.005, 0.01],
    'reg_all': [0.02, 0.05, 0.1]
}

gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

print(gs.best_params['rmse'])