In [1]:
from surprise import SVD, SVDpp, NMF
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.model_selection import RandomizedSearchCV, GridSearchCV

In [2]:
data = Dataset.load_builtin('ml-100k')

In [3]:
algo = SVD(random_state=42, reg_all=0.4, lr_all=0.01, n_epochs=15)

In [4]:
results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9573  0.9634  0.9467  0.9659  0.9523  0.9571  0.0070  
MAE (testset)     0.7661  0.7711  0.7604  0.7739  0.7615  0.7666  0.0053  
Fit time          0.56    0.44    0.43    0.43    0.42    0.46    0.05    
Test time         0.07    0.10    0.11    0.10    0.06    0.09    0.02    


In [5]:
# Модель SVD++
algo_svdpp = SVDpp()
results_svdpp = cross_validate(algo_svdpp, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9160  0.9205  0.9175  0.9119  0.9242  0.9180  0.0041  
MAE (testset)     0.7185  0.7228  0.7175  0.7156  0.7280  0.7205  0.0044  
Fit time          17.18   17.24   17.09   17.00   17.34   17.17   0.12    
Test time         2.14    1.96    2.00    1.93    2.34    2.07    0.15    


In [6]:
# Модель NMF
algo_nmf = NMF()
results_nmf = cross_validate(algo_nmf, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9627  0.9625  0.9684  0.9670  0.9643  0.9650  0.0024  
MAE (testset)     0.7555  0.7558  0.7600  0.7623  0.7608  0.7589  0.0028  
Fit time          1.36    1.53    1.25    1.37    1.11    1.32    0.14    
Test time         0.11    0.11    0.06    0.06    0.10    0.09    0.02    


In [7]:
param_dist = {'n_epochs': [5, 10, 15], 'lr_all': [0.002, 0.005, 0.01],
              'reg_all': [0.4, 0.6, 0.8]}

random_search = RandomizedSearchCV(SVD, param_distributions=param_dist, measures=['RMSE', 'MAE'], cv=5, n_iter=5)
random_search.fit(data)

best_params = random_search.best_params['rmse']
best_params

{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.4}

In [8]:
param_grid = {'n_epochs': [5, 10, 15], 'lr_all': [0.002, 0.005, 0.01],
              'reg_all': [0.4, 0.6, 0.8]}

grid_search = GridSearchCV(SVD, param_grid, measures=['RMSE', 'MAE'], cv=5)
grid_search.fit(data)

best_params = grid_search.best_params['rmse']
best_params

{'n_epochs': 15, 'lr_all': 0.01, 'reg_all': 0.4}