In [1]:
import surprise

surprise.__version__

'1.1.1'

In [2]:
from surprise import Dataset
from surprise.model_selection import train_test_split

data = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(data, test_size=.25, random_state=0)

In [3]:
from surprise import SVD

algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2459eb86dd8>

In [4]:
predictions = algo.test(testset)
print(f'prediction type: {type(predictions)}, size: {len(predictions)}')
print('predictions[:5]')
predictions[:5]

prediction type: <class 'list'>, size: 25000
predictions[:5]


[Prediction(uid='120', iid='282', r_ui=4.0, est=3.708299224100119, details={'was_impossible': False}),
 Prediction(uid='882', iid='291', r_ui=4.0, est=3.9388186549275637, details={'was_impossible': False}),
 Prediction(uid='535', iid='507', r_ui=5.0, est=4.09037623598026, details={'was_impossible': False}),
 Prediction(uid='697', iid='244', r_ui=5.0, est=3.5837119192682456, details={'was_impossible': False}),
 Prediction(uid='751', iid='385', r_ui=4.0, est=3.410239115771723, details={'was_impossible': False})]

In [5]:
[(pred.uid, pred.iid, pred.est) for pred in predictions[:3]]

[('120', '282', 3.708299224100119),
 ('882', '291', 3.9388186549275637),
 ('535', '507', 4.09037623598026)]

In [6]:
uid = str(196)
iid = str(302)
pred = algo.predict(uid, iid)
print(pred)

user: 196        item: 302        r_ui = None   est = 4.39   {'was_impossible': False}


In [7]:
from surprise import accuracy 

accuracy.rmse(predictions)

RMSE: 0.9459


0.9458870744152914

In [8]:
import pandas as pd

ratings = pd.read_csv('../data/ml-latest-small/ratings.csv')
ratings.to_csv('../data/ml-latest-small/ratings_noh.csv', index=False, header=False)

In [9]:
from surprise import Reader

reader = Reader(line_format='user item rating timestamp', sep=',', rating_scale=(0.5, 5))
data = Dataset.load_from_file('../data/ml-latest-small/ratings_noh.csv', reader=reader)

In [10]:
trainset, testset = train_test_split(data, test_size=.25, random_state=0)

algo = SVD(n_factors=50, random_state=0)
algo.fit(trainset) 
predictions = algo.test(testset)
accuracy.rmse(predictions)

RMSE: 0.8682


0.8681952927143516

In [11]:
ratings = pd.read_csv('../data/ml-latest-small/ratings.csv') 
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=.25, random_state=0)

algo = SVD(n_factors=50, random_state=0)
algo.fit(trainset) 
predictions = algo.test(testset)
accuracy.rmse(predictions)

RMSE: 0.8682


0.8681952927143516

In [12]:
from surprise.model_selection import cross_validate

ratings = pd.read_csv('../data/ml-latest-small/ratings.csv')
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

algo = SVD(random_state=0)
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8754  0.8707  0.8794  0.8754  0.8700  0.8742  0.0035  
MAE (testset)     0.6750  0.6694  0.6725  0.6735  0.6688  0.6719  0.0024  
Fit time          3.44    3.42    3.43    3.42    3.44    3.43    0.01    
Test time         0.10    0.10    0.15    0.15    0.10    0.12    0.03    


{'test_rmse': array([0.8754148 , 0.87071727, 0.87942313, 0.87538125, 0.86997531]),
 'test_mae': array([0.67504336, 0.6693532 , 0.67254813, 0.67346476, 0.66884815]),
 'fit_time': (3.4420013427734375,
  3.4200000762939453,
  3.4279978275299072,
  3.422999620437622,
  3.4409966468811035),
 'test_time': (0.0989992618560791,
  0.09699892997741699,
  0.14800000190734863,
  0.15000081062316895,
  0.09800362586975098)}

In [13]:
from surprise.model_selection import GridSearchCV

param_grid = {'n_epochs': [20, 40, 60], 'n_factors': [50, 100, 200]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

0.8763945573095354
{'n_epochs': 20, 'n_factors': 50}
