In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from surprise import Dataset, Reader, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate, GridSearchCV, train_test_split
import random

In [17]:
data = Dataset.load_builtin('ml-100k')

raw_ratings = data.raw_ratings

df = pd.DataFrame(raw_ratings, columns=['user', 'item', 'rating', 'timestamp'])

df.head()

Unnamed: 0,user,item,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [18]:
param_grid = {
    'n_epochs': [12, 20, 24],
    'lr_all': [0.01, 0.03, 0.05],
    'reg_all': [0.02, 0.1, 0.2]
}

gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

print("Best RMSE score:", gs.best_score['rmse'])
print("Best parameters:", gs.best_params['rmse'])

Best RMSE score: 0.9241409686414221
Best parameters: {'n_epochs': 24, 'lr_all': 0.01, 'reg_all': 0.1}


In [19]:
algo = SVD(n_epochs=gs.best_params['rmse']['n_epochs'],
           lr_all=gs.best_params['rmse']['lr_all'],
           reg_all=gs.best_params['rmse']['reg_all'])

results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)
print("\nРезультати з найкращими параметрами:")
print("RMSE: ", np.mean(results['test_rmse']))
print("MAE: ", np.mean(results['test_mae']))

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9219  0.9262  0.9245  0.9242  0.0018  
MAE (testset)     0.7317  0.7309  0.7325  0.7317  0.0007  
Fit time          1.36    1.36    1.38    1.37    0.01    
Test time         0.20    0.19    0.19    0.19    0.00    

Результати з найкращими параметрами:
RMSE:  0.9241781344857912
MAE:  0.7316914299532095


In [20]:
from surprise import NMF

param_grid = {
    'n_epochs': [50, 100],
    'lr_bu': [0.002, 0.005],
    'lr_bi': [0.002, 0.005],
    'reg_pu': [0.06, 0.1],
    'reg_qi': [0.06, 0.1]
}

gs = GridSearchCV(NMF, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

In [21]:
print("Best RMSE score for NMF:", gs.best_score['rmse'])
print("Best parameters for NMF:", gs.best_params['rmse'])

best_params = gs.best_params['rmse']
algo_nmf = NMF(n_epochs=best_params['n_epochs'],
               lr_bu=best_params['lr_bu'],
               lr_bi=best_params['lr_bi'],
               reg_pu=best_params['reg_pu'],
               reg_qi=best_params['reg_qi'])

results_nmf = cross_validate(algo_nmf, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

print("\nРезультати з найкращими параметрами для NMF:")
print("RMSE: ", np.mean(results_nmf['test_rmse']))
print("MAE: ", np.mean(results_nmf['test_mae']))

Best RMSE score for NMF: 0.9472357552854861
Best parameters for NMF: {'n_epochs': 100, 'lr_bu': 0.005, 'lr_bi': 0.005, 'reg_pu': 0.1, 'reg_qi': 0.1}
Evaluating RMSE, MAE of algorithm NMF on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9478  0.9476  0.9482  0.9478  0.0003  
MAE (testset)     0.7508  0.7533  0.7498  0.7513  0.0014  
Fit time          4.30    3.40    3.40    3.70    0.43    
Test time         0.20    0.18    0.16    0.18    0.02    

Результати з найкращими параметрами для NMF:
RMSE:  0.947840414960535
MAE:  0.7513093368343302


In [22]:
from surprise import SVDpp
from surprise.model_selection import GridSearchCV, cross_validate
import numpy as np

param_grid = {
    'n_epochs': [20, 50],
    'lr_all': [0.005, 0.01],
    'reg_all': [0.02, 0.1]
}

gs = GridSearchCV(SVDpp, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

print("Best RMSE score for SVD++:", gs.best_score['rmse'])
print("Best parameters for SVD++:", gs.best_params['rmse'])

Best RMSE score for SVD++: 0.9233828576226859
Best parameters for SVD++: {'n_epochs': 50, 'lr_all': 0.01, 'reg_all': 0.1}


In [23]:
best_params = gs.best_params['rmse']
algo_svdpp = SVDpp(n_epochs=best_params['n_epochs'],
                   lr_all=best_params['lr_all'],
                   reg_all=best_params['reg_all'])

results_svdpp = cross_validate(algo_svdpp, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

print("\nРезультати з найкращими параметрами для SVD++:")
print("RMSE: ", np.mean(results_svdpp['test_rmse']))
print("MAE: ", np.mean(results_svdpp['test_mae']))

Evaluating RMSE, MAE of algorithm SVDpp on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9214  0.9230  0.9211  0.9218  0.0009  
MAE (testset)     0.7271  0.7292  0.7253  0.7272  0.0016  
Fit time          47.08   48.83   49.05   48.32   0.88    
Test time         7.47    8.72    7.50    7.90    0.58    

Результати з найкращими параметрами для SVD++:
RMSE:  0.9218408064652092
MAE:  0.7271813226885163
