# Домашня робота №7 


#### Імпортуємо бібліотеки 

In [39]:
import pandas as pd
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split, cross_validate
from surprise import SVD, SVDpp, NMF
from surprise import accuracy
from surprise.model_selection import GridSearchCV, RandomizedSearchCV
from hyperopt import hp, fmin, tpe, Trials

#### Підготуємо датасет 

In [37]:
data = Dataset.load_builtin('ml-100k')
reader = Reader(rating_scale=(1, 5))
df = pd.DataFrame(data.raw_ratings, columns=['user_id', 'item_id', 'rating', 'timestamp'])
data = Dataset.load_from_df(df[['user_id', 'item_id', 'rating']], reader)
df

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596
...,...,...,...,...
99995,880,476,3.0,880175444
99996,716,204,5.0,879795543
99997,276,1090,1.0,874795795
99998,13,225,2.0,882399156


#### Розділимо вибірку

In [8]:
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

#### Оберемо найкращі параметри

In [42]:
from hyperopt import hp, fmin, tpe, Trials

algo_spaces = {
#     'SVD': {
#         'n_factors': hp.choice('n_factors_svd', [50, 100, 150]),
#         'n_epochs': hp.choice('n_epochs_svd', [10, 20, 30]),
#         'lr_all': hp.uniform('lr_all_svd', 0.001, 0.1),
#         'reg_all': hp.uniform('reg_all_svd', 0.01, 0.2),
#         'random_state': 0
#     },
#      'SVDpp': {
#         'n_factors': hp.choice('n_factors_svdpp', [50, 100, 150]),
#         'n_epochs': hp.choice('n_epochs_svdpp', [10, 20, 30]),
#         'lr_all': hp.uniform('lr_all_svdpp', 0.001, 0.1),
#         'reg_all': hp.uniform('reg_all_svdpp', 0.01, 0.2),
#         'random_state': 0
#     },
    'NMF': {
        'n_factors': hp.choice('n_factors_nmf', [5, 10, 15, 20, 25]),
        'n_epochs': hp.choice('n_epochs_nmf', [10, 20, 30, 40, 50]),
        'biased': hp.choice('biased_nmf', [True, False]),
        'reg_pu': hp.uniform('reg_pu_nmf', 0.001, 0.1),
        'reg_qi': hp.uniform('reg_qi_nmf', 0.001, 0.1),
        'lr_bu': hp.uniform('lr_bu_nmf', 0.001, 0.1),
        'lr_bi': hp.uniform('lr_bi_nmf', 0.001, 0.1),
        'random_state': 0
    }
}

def algo_objective(params, algo_name):
    if algo_name == 'SVD':
        algo = SVD(**params)
    elif algo_name == 'SVDpp':
        algo = SVDpp(**params)
    elif algo_name == 'NMF':
        algo = NMF(**params)
    
    rmse = cross_validate(algo, data, measures=['RMSE'], cv=5)['test_rmse'].mean()
    return rmse

for algo_name, space in algo_spaces.items():
    trials = Trials() 
    best = fmin(lambda params: algo_objective(params, algo_name), space, algo=tpe.suggest, max_evals=50, trials=trials)
    print(f"Best parameters for {algo_name}:")
    print(best)

100%|████████████████████████████████████████████████| 50/50 [10:15<00:00, 12.31s/trial, best loss: 0.9442320731298286]
Best parameters for NMF:
{'biased_nmf': 0, 'lr_bi_nmf': 0.008129025753785673, 'lr_bu_nmf': 0.027399460741379694, 'n_epochs_nmf': 1, 'n_factors_nmf': 2, 'reg_pu_nmf': 0.08468010916047869, 'reg_qi_nmf': 0.09560446522744714}


#### Підставимо параметри в моделі

In [47]:
svd_model = SVD(lr_all=0.012, n_epochs=2, n_factors=2, reg_all=0.108)
svdpp_model = SVDpp(lr_all=0.01, n_epochs= 5, n_factors=5, reg_all=0.1)
nmf_model = NMF(biased=0, lr_bi=0.008129025753785673, lr_bu=0.027399460741379694,
                n_epochs=1, n_factors=2,
                reg_pu=0.08468010916047869, reg_qi=0.09560446522744714)


#### Застосуємо крос-валідацію

In [48]:
models = [svd_model, svdpp_model, nmf_model]
for model in models:
    print(f"Cross-validating {model.__class__.__name__}")
    cross_validate(model, data, measures=['RMSE'], cv=5, verbose=True)


Cross-validating SVD
Evaluating RMSE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9597  0.9664  0.9550  0.9553  0.9574  0.9588  0.0042  
Fit time          0.10    0.13    0.13    0.13    0.14    0.13    0.01    
Test time         0.10    0.17    0.12    0.16    0.10    0.13    0.03    
Cross-validating SVDpp
Evaluating RMSE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9500  0.9529  0.9459  0.9389  0.9531  0.9482  0.0053  
Fit time          19.71   20.05   20.63   20.31   20.47   20.23   0.33    
Test time         2.45    2.17    2.40    2.03    2.16    2.24    0.16    
Cross-validating NMF
Evaluating RMSE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.8260  1.8319  1.8456  1.8265  1.8232  1.8307  0.0080  
Fit time          0.07    0.08    0.