In [1]:
!pip install scikit-surprise pandas

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357231 sha256=06ee1c4627f5313da9102822c91f0d1248e34594b8cd25a00454fc72eb599667
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succe

In [2]:
import pandas as pd
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, GridSearchCV
from surprise import SVD, SVDpp, NMF

In [6]:
movies_path = 'movies.csv'
ratings_path = 'ratings.csv'
tags_path = 'tags.csv'
links_path = 'links.csv'

movies = pd.read_csv(movies_path)
ratings = pd.read_csv(ratings_path)
tags = pd.read_csv(tags_path)
links = pd.read_csv(links_path)

ratings = ratings[ratings['userId'].isin(ratings['userId'].unique()[:1000])]
ratings = ratings[ratings['movieId'].isin(ratings['movieId'].unique()[:1000])]

reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

def tune_and_evaluate(algo_class, param_grid):
    gs = GridSearchCV(algo_class, param_grid, measures=['rmse'], cv=3)
    gs.fit(data)
    print(f"Best RMSE: {gs.best_score['rmse']}")
    print(f"Best parameters: {gs.best_params['rmse']}")
    return gs.best_estimator['rmse']


In [7]:
param_grid_svd = {
    'n_epochs': [20, 30],
    'lr_all': [0.002, 0.005],
    'reg_all': [0.02, 0.1]
}
best_svd = tune_and_evaluate(SVD, param_grid_svd)

Best RMSE: 0.8533263417946898
Best parameters: {'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.1}


In [8]:
param_grid_svdpp = {
    'n_epochs': [20, 30],
    'lr_all': [0.002, 0.005],
    'reg_all': [0.02, 0.1]
}
best_svdpp = tune_and_evaluate(SVDpp, param_grid_svdpp)

Best RMSE: 0.8455539937939988
Best parameters: {'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.02}


In [9]:
param_grid_nmf = {
    'n_epochs': [20, 30],
    'n_factors': [15, 20],
    'reg_pu': [0.06, 0.1],
    'reg_qi': [0.06, 0.1]
}
best_nmf = tune_and_evaluate(NMF, param_grid_nmf)

Best RMSE: 0.8770356578893281
Best parameters: {'n_epochs': 30, 'n_factors': 20, 'reg_pu': 0.1, 'reg_qi': 0.1}


In [10]:
cross_validate(best_svd, data, measures=['RMSE'], cv=3, verbose=True)
cross_validate(best_svdpp, data, measures=['RMSE'], cv=3, verbose=True)
cross_validate(best_nmf, data, measures=['RMSE'], cv=3, verbose=True)

Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8610  0.8528  0.8471  0.8536  0.0057  
Fit time          0.74    0.89    0.74    0.79    0.07    
Test time         0.08    0.23    0.09    0.13    0.07    
Evaluating RMSE of algorithm SVDpp on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8533  0.8435  0.8476  0.8482  0.0040  
Fit time          10.52   10.45   10.67   10.54   0.09    
Test time         2.47    2.67    2.59    2.58    0.08    
Evaluating RMSE of algorithm NMF on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8859  0.8725  0.8709  0.8764  0.0067  
Fit time          0.48    0.50    0.50    0.49    0.01    
Test time         0.07    0.07    0.23    0.12    0.08    


{'test_rmse': array([0.88590151, 0.87250274, 0.87089932]),
 'fit_time': (0.47557544708251953, 0.5027868747711182, 0.5019881725311279),
 'test_time': (0.06702423095703125, 0.06619381904602051, 0.22749876976013184)}