In [1]:
! pip install surprise


Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3162723 sha256=6c91ab53cf1ffc491a35034cf586def85494f16cab08d7b97b4f1448eee99e30
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1


In [17]:
from surprise import dataset, accuracy, Dataset, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate, GridSearchCV

In [18]:
data = dataset.Dataset.load_builtin(name='ml-100k', prompt=True)

In [19]:
# Define algorithms to experiment with
algos = {
    'SVD': SVD,
    'SVD++': SVDpp,
    'NMF': NMF
}

In [20]:
# Define parameters grid for GridSearchCV
param_grid = {
    'SVD': {
        'n_epochs': [5, 10, 15],
        'lr_all': [0.002, 0.005],
        'reg_all': [0.4, 0.6]
    },
    'SVD++': {
        'n_epochs': [5, 10, 15],
        'lr_all': [0.002, 0.005],
        'reg_all': [0.4, 0.6]
    },
    'NMF': {
        'n_epochs': [50, 100, 200],
        'reg_pu': [0.06, 0.08, 0.1],
        'reg_qi': [0.06, 0.08, 0.1]
    }
}

In [21]:
# Perform grid search and cross-validation for each algorithm
for algo_name, algo_class in algos.items():
    print(f"Evaluating {algo_name}...")

    # Grid search for the best parameters
    gs = GridSearchCV(algo_class, param_grid[algo_name], measures=['rmse'], cv=3)
    gs.fit(data)

    # Print the best RMSE score and the corresponding parameters
    print(f"Best RMSE for {algo_name}: {gs.best_score['rmse']}")
    print(f"Best parameters for {algo_name}: {gs.best_params['rmse']}")

    # Cross-validation with the best parameters
    results = cross_validate(algo_class(**gs.best_params['rmse']), data, measures=['RMSE'], cv=3, verbose=False)
    print(f"Cross-validation RMSE scores for {algo_name}: {results['test_rmse']}")
    print("")

Evaluating SVD...
Best RMSE for SVD: 0.9608886130054367
Best parameters for SVD: {'n_epochs': 15, 'lr_all': 0.005, 'reg_all': 0.4}
Cross-validation RMSE scores for SVD: [0.96159881 0.95904277 0.96084886]

Evaluating SVD++...
Best RMSE for SVD++: 0.9598032227864874
Best parameters for SVD++: {'n_epochs': 15, 'lr_all': 0.005, 'reg_all': 0.4}
Cross-validation RMSE scores for SVD++: [0.96322914 0.9594159  0.95877835]

Evaluating NMF...
Best RMSE for NMF: 0.9404170825783922
Best parameters for NMF: {'n_epochs': 200, 'reg_pu': 0.1, 'reg_qi': 0.1}
Cross-validation RMSE scores for NMF: [0.94070177 0.93702961 0.94245506]

