http://surpriselib.com

# Load data

In [2]:
from surprise import Dataset

data = Dataset.load_builtin('ml-100k')

# Manual pipeline

## Split data in train and test

In [6]:
from surprise.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2, random_state=42)

In [7]:
test

[('907', '143', 5.0),
 ('371', '210', 4.0),
 ('218', '42', 4.0),
 ('829', '170', 4.0),
 ('733', '277', 1.0),
 ('363', '1512', 1.0),
 ('193', '487', 5.0),
 ('808', '313', 5.0),
 ('557', '682', 2.0),
 ('774', '196', 3.0),
 ('638', '118', 3.0),
 ('632', '81', 5.0),
 ('417', '200', 4.0),
 ('580', '471', 3.0),
 ('640', '91', 4.0),
 ('450', '328', 4.0),
 ('596', '13', 2.0),
 ('586', '467', 4.0),
 ('653', '502', 2.0),
 ('378', '517', 3.0),
 ('405', '65', 1.0),
 ('279', '399', 4.0),
 ('327', '293', 3.0),
 ('346', '276', 1.0),
 ('59', '928', 4.0),
 ('514', '22', 4.0),
 ('807', '402', 5.0),
 ('473', '327', 3.0),
 ('342', '324', 1.0),
 ('269', '136', 4.0),
 ('654', '1', 4.0),
 ('250', '28', 4.0),
 ('282', '689', 2.0),
 ('534', '619', 4.0),
 ('194', '481', 3.0),
 ('184', '118', 2.0),
 ('291', '739', 3.0),
 ('293', '31', 2.0),
 ('943', '1028', 2.0),
 ('65', '69', 3.0),
 ('562', '135', 5.0),
 ('466', '62', 3.0),
 ('847', '317', 3.0),
 ('650', '521', 3.0),
 ('656', '326', 1.0),
 ('366', '53', 5.0),
 

In [9]:
train.n_users, train.n_items

(943, 1651)

## Train model

- Use 2 models : user & item based
https://surprise.readthedocs.io/en/stable/knn_inspired.html

In [11]:
from surprise import SVD

model = SVD()

In [12]:
model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fd6d9197250>

## Make predictions

In [14]:
predictions = model.test(test)
predictions

[Prediction(uid='907', iid='143', r_ui=5.0, est=4.8517497552112205, details={'was_impossible': False}),
 Prediction(uid='371', iid='210', r_ui=4.0, est=4.2336039302898225, details={'was_impossible': False}),
 Prediction(uid='218', iid='42', r_ui=4.0, est=3.5923614405339315, details={'was_impossible': False}),
 Prediction(uid='829', iid='170', r_ui=4.0, est=4.008180210929193, details={'was_impossible': False}),
 Prediction(uid='733', iid='277', r_ui=1.0, est=2.908722176520155, details={'was_impossible': False}),
 Prediction(uid='363', iid='1512', r_ui=1.0, est=3.357134730170079, details={'was_impossible': False}),
 Prediction(uid='193', iid='487', r_ui=5.0, est=3.5778387469577577, details={'was_impossible': False}),
 Prediction(uid='808', iid='313', r_ui=5.0, est=4.357332887802932, details={'was_impossible': False}),
 Prediction(uid='557', iid='682', r_ui=2.0, est=3.3001113935343422, details={'was_impossible': False}),
 Prediction(uid='774', iid='196', r_ui=3.0, est=2.6598681496173238, 

## Evaluation

In [16]:
from surprise import accuracy

accuracy.rmse(predictions=predictions)

RMSE: 0.9380


0.9380006431882001

In [17]:
accuracy.mae(predictions=predictions)

MAE:  0.7397


0.7396981029970237

# Cross validation

In [18]:
from surprise.model_selection import cross_validate

cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9314  0.9307  0.9312  0.9438  0.9345  0.9343  0.0049  
MAE (testset)     0.7326  0.7327  0.7363  0.7426  0.7357  0.7360  0.0036  
Fit time          3.72    3.66    3.64    3.85    3.88    3.75    0.10    
Test time         0.10    0.19    0.10    0.10    0.19    0.14    0.04    


{'test_rmse': array([0.93137282, 0.93073342, 0.93119095, 0.94378029, 0.9345494 ]),
 'test_mae': array([0.73262706, 0.73267075, 0.73625911, 0.74260416, 0.73574617]),
 'fit_time': (3.715752124786377,
  3.6570870876312256,
  3.6384329795837402,
  3.845144033432007,
  3.878741979598999),
 'test_time': (0.09677886962890625,
  0.19050812721252441,
  0.10015392303466797,
  0.10469675064086914,
  0.1918962001800537)}