https://surprise.readthedocs.io/en/stable/getting_started.html

https://surprise.readthedocs.io/en/stable/basic_algorithms.html

https://github.com/NicolasHug/Surprise/tree/2381fb11d0c4bf917cc4b9126f205d0013649966

https://github.com/NicolasHug/Surprise/tree/2381fb11d0c4bf917cc4b9126f205d0013649966/examples

In [56]:
! pip install numpy
! pip install scikit-surprise



In [57]:
from surprise import accuracy, Dataset, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate, train_test_split, GridSearchCV

In [58]:
# Load the movielens-100k dataset (download it if needed).
data = Dataset.load_builtin('ml-100k')

# Use the famous SVD algorithm.
model_SVD = SVD()

# Run 5-fold cross-validation and print results.
cross_validate(model_SVD, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9308  0.9340  0.9352  0.9350  0.9421  0.9354  0.0037  
MAE (testset)     0.7336  0.7359  0.7390  0.7374  0.7408  0.7374  0.0025  
Fit time          3.42    3.15    2.24    1.89    2.28    2.60    0.58    
Test time         0.22    0.65    0.13    0.25    0.15    0.28    0.19    


{'test_rmse': array([0.93081157, 0.9340415 , 0.9351977 , 0.93500448, 0.94212583]),
 'test_mae': array([0.73357518, 0.73589333, 0.73904525, 0.73743836, 0.74080164]),
 'fit_time': (3.4230008125305176,
  3.1472465991973877,
  2.2414731979370117,
  1.8938419818878174,
  2.2799010276794434),
 'test_time': (0.22007966041564941,
  0.6451282501220703,
  0.12758898735046387,
  0.24619269371032715,
  0.1454019546508789)}

In [59]:
# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=0.25)

# We'll use the famous SVD algorithm.
algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)

RMSE: 0.9386


0.9385927935440772

In [60]:
param_grid = {"n_epochs": [5, 10], "lr_all": [0.002, 0.005], "reg_all": [0.4, 0.6]}


In [61]:
gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(data)

In [62]:
# combination of parameters that gave the best params, RMSE score, MAE score
print('Best params:', gs.best_params)
print("Best RMSE:", gs.best_score['rmse'])
print("Best MAE:", gs.best_score['mae'])

Best params: {'rmse': {'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}, 'mae': {'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}}
Best RMSE: 0.9643229590232348
Best MAE: 0.7727637841189673


In [63]:
gs_svdpp = GridSearchCV(SVDpp, param_grid, measures=["rmse", "mae"], cv=3)
gs_svdpp.fit(data)


In [64]:
print('Best params:', gs_svdpp.best_params)
print("Best RMSE:", gs_svdpp.best_score['rmse'])
print("Best MAE:", gs_svdpp.best_score['mae'])


Best params: {'rmse': {'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}, 'mae': {'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}}
Best RMSE: 0.9637390712338404
Best MAE: 0.7726027941236756


In [65]:
param_nmf = {'n_factors': [5, 10], 'n_epochs': [10, 15], 'reg_pu': [0.02, 0.1], 'reg_qi': [0.02, 0.1]}


In [66]:
gs_nmf = GridSearchCV(NMF, param_nmf, measures=['rmse', 'mae'], cv=3)
gs_nmf.fit(data)

In [67]:
print('Best params:', gs_nmf.best_params)
print("Best RMSE:", gs_nmf.best_score['rmse'])
print("Best MAE:", gs_nmf.best_score['mae'])


Best params: {'rmse': {'n_factors': 10, 'n_epochs': 15, 'reg_pu': 0.1, 'reg_qi': 0.1}, 'mae': {'n_factors': 10, 'n_epochs': 15, 'reg_pu': 0.1, 'reg_qi': 0.1}}
Best RMSE: 0.9857737960411184
Best MAE: 0.764654415351862


***The SVD method showed better results than SVD++ and NMF***