In [1]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/772.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/772.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m768.0/772.0 kB[0m [31m12.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3163494 sha256=3c0fb7e19b640b6420359048bc542d8815b8422abf118e1a5003ece57c81ee5f
  Stored in directory: /root/.cache/pip/wheels/

In [2]:
from surprise import Dataset, Reader

reader = Reader(line_format='user item rating', sep='\t', rating_scale=(1, 5))
train_data = Dataset.load_from_file('train.csv', reader=reader)

In [23]:
import numpy as np
from surprise.prediction_algorithms import SVD, KNNBasic, KNNWithMeans, KNNWithZScore

param_grid = {
    SVD: {
        'n_epochs': [5, 10],
        'lr_all': [0.002, 0.005],
        'reg_all': [0.4, 0.6],
        'n_factors': [50, 100],
    },
    KNNBasic: {
        'k': [20, 40],
        'min_k': [1, 5],
        'sim_options': {
            'name': ['cosine', 'msd', 'pearson'],
            'user_based': [True, False],
        }
    },
    KNNWithMeans: {
        'k': [20, 40],
        'min_k': [1, 5],
        'sim_options': {
            'name': ['cosine', 'msd', 'pearson'],
            'user_based': [True, False],
        }
    },
    KNNWithZScore: {
        'k': [20, 40],
        'min_k': [1, 5],
        'sim_options': {
            'name': ['cosine', 'msd', 'pearson'],
            'user_based': [True, False],
        }
    }
}

In [24]:
algorithms = [SVD, KNNBasic, KNNWithMeans, KNNWithZScore]

In [None]:
from surprise.model_selection import GridSearchCV
from tqdm import tqdm

grid_search_results = dict()

for algorithm in tqdm(algorithms):
    current_param_grid = param_grid[algorithm]

    grid_search = GridSearchCV(algorithm, current_param_grid, measures=['rmse', 'mae', 'mse', 'fcp'], cv=5)
    grid_search.fit(train_data)
    grid_search_results[algorithm] = grid_search

In [28]:
for cls, result in grid_search_results.items():
    print(cls.__name__, result.best_score)

SVD {'rmse': 0.962311873072293, 'mae': 0.7712758672541239, 'mse': 0.9260590130253148, 'fcp': 0.6872404615722598}
KNNBasic {'rmse': 0.9865432641805876, 'mae': 0.7790153346051283, 'mse': 0.9732944563746375, 'fcp': 0.701349086585812}
KNNWithMeans {'rmse': 0.9408033730123566, 'mae': 0.7391727293224762, 'mse': 0.8851188965760162, 'fcp': 0.6953603565575506}
KNNWithZScore {'rmse': 0.9430370670048006, 'mae': 0.7404776009657875, 'mse': 0.8893377687172374, 'fcp': 0.6962916530747497}


In [30]:
best_algo = grid_search_results[KNNBasic].best_estimator['rmse']
best_params = grid_search_results[KNNBasic].best_params['rmse']

In [31]:
best_algo

<surprise.prediction_algorithms.knns.KNNBasic at 0x7cf9affd09a0>

In [32]:
best_params

{'k': 40, 'min_k': 1, 'sim_options': {'name': 'msd', 'user_based': False}}