### Basic Usage

In [1]:
from surprise import SVD
from surprise import KNNBasic
from surprise import BaselineOnly
from surprise import Dataset
from surprise.model_selection import cross_validate

In [2]:
#Loading the inbuilt data (https://grouplens.org/datasets/movielens/)
data = Dataset.load_builtin('ml-100k')

# We'll use the famous SVD algorithm. 
#For other set of algorithms (https://surprise.readthedocs.io/en/stable/prediction_algorithms_package.html)
algo = SVD()

# Run 5-fold cross-validation and print results
result = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9402  0.9287  0.9389  0.9407  0.9353  0.9368  0.0044  
MAE (testset)     0.7423  0.7317  0.7417  0.7408  0.7363  0.7385  0.0040  
Fit time          4.09    4.14    4.16    4.20    4.16    4.15    0.03    
Test time         0.19    0.18    0.14    0.18    0.15    0.17    0.02    


In [13]:
import pandas as pd
pd.DataFrame(result).mean()[0],pd.DataFrame(result).mean()[2]

(0.9365406423111888, 4.305975866317749)

### Train-Test Split on Custom Dataset

In [5]:
import os
from surprise import KNNBasic
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import accuracy
from surprise.model_selection import train_test_split


# path to dataset file
file_path = os.path.expanduser('ml-100k/u.data')

# As we're loading a custom dataset, we need to define a reader. In the
# movielens-100k dataset, each line has the following format:
# 'user item rating timestamp', separated by '\t' characters.
reader = Reader(line_format='user item rating timestamp', sep='\t')

data = Dataset.load_from_file(file_path, reader=reader)

# We can now use this dataset as we please, e.g. calling cross_validate
cross_validate(KNNBasic(), data, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9811  0.9856  0.9767  0.9758  0.9766  0.9792  0.0037  
MAE (testset)     0.7732  0.7785  0.7727  0.7719  0.7694  0.7731  0.0030  
Fit time          0.30    0.41    0.40    0.35    0.36    0.36    0.04    
Test time         3.48    3.65    3.46    3.24    3.07    3.38    0.20    


{'test_rmse': array([0.98112338, 0.9856349 , 0.97666239, 0.97583508, 0.97661851]),
 'test_mae': array([0.77321689, 0.77850231, 0.77266881, 0.77188031, 0.76936857]),
 'fit_time': (0.2971210479736328,
  0.41384458541870117,
  0.3968045711517334,
  0.34952497482299805,
  0.35667848587036133),
 'test_time': (3.4816198348999023,
  3.646031618118286,
  3.4574084281921387,
  3.242081642150879,
  3.06756329536438)}

In [6]:
trainset, testset = train_test_split(data, test_size=.25)

In [7]:
algo = KNNBasic()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9847


0.9847128398757801