[Surprise · A Python scikit for recommender systems.](http://surpriselib.com/)

In [1]:
# !pip install scikit-surprise
# !conda install -c conda-forge scikit-surprise

In [2]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate

# Load the movielens-100k dataset (download it if needed).
data = Dataset.load_builtin('ml-100k')

# Use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9373  0.9381  0.9372  0.9367  0.9317  0.9362  0.0023  
MAE (testset)     0.7406  0.7401  0.7386  0.7381  0.7348  0.7384  0.0020  
Fit time          4.52    4.62    4.61    4.29    4.06    4.42    0.22    
Test time         0.19    0.18    0.17    0.12    0.15    0.16    0.02    


{'test_rmse': array([0.93725713, 0.93813911, 0.93721565, 0.93666567, 0.93167657]),
 'test_mae': array([0.74059654, 0.74013634, 0.73855699, 0.738126  , 0.73483364]),
 'fit_time': (4.518592119216919,
  4.618161916732788,
  4.611346960067749,
  4.285784959793091,
  4.060755014419556),
 'test_time': (0.18583202362060547,
  0.17894315719604492,
  0.16956734657287598,
  0.11783695220947266,
  0.14653325080871582)}

In [4]:
from surprise import KNNBasic

# Retrieve the trainset.
trainset = data.build_full_trainset()

# Build an algorithm, and train it.
algo = KNNBasic()
algo.fit(trainset)


Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7f81086ed0d0>

In [6]:
import pandas as pd

from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate


# Creation of the dataframe. Column names are irrelevant.
ratings_dict = {'itemID': [1, 1, 1, 2, 2],
                'userID': [9, 32, 2, 45, 'user_foo'],
                'rating': [3, 2, 4, 3, 1]}
df = pd.DataFrame(ratings_dict)

# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(1, 5))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

# We can now use this dataset as we please, e.g. calling cross_validate
cross_validate(NormalPredictor(), data, cv=2)

{'test_rmse': array([1.10922719, 1.26645399]),
 'test_mae': array([1.0428654 , 1.18356501]),
 'fit_time': (9.322166442871094e-05, 4.8160552978515625e-05),
 'test_time': (5.91278076171875e-05, 3.0040740966796875e-05)}