In [21]:
import numpy as np
import surprise

In [22]:
class MatrixFacto(surprise.AlgoBase):
    '''A basic rating prediction algorithm based on matrix factorization.'''
    
    def __init__(self, learning_rate, n_epochs, n_factors):
        
        self.lr = learning_rate  # learning rate for SGD
        self.n_epochs = n_epochs  # number of iterations of SGD
        self.n_factors = n_factors  # number of factors
        
    def train(self, trainset):
        '''Learn the vectors p_u and q_i with SGD'''
        
        print('Fitting data with SGD...')
        
        # Randomly initialize the user and item factors
        pu = np.random.normal(0, .1, (trainset.n_users, self.n_factors))
        qi = np.random.normal(0, .1, (trainset.n_items, self.n_factors))
        
        for _ in range(self.n_epochs):
            for u, i, r_ui in trainset.all_ratings():
                err = r_ui - np.dot(pu[u], qi[i])
                # Update vectors p_u and q_i
                pu[u] += self.lr * err * qi[i]
                qi[i] += self.lr * err * pu[u]
        
        self.pu, self.qi = pu, qi
        self.trainset = trainset

    def estimate(self, u, i):
        '''Return the estmimated rating of user u for item i.'''
        
        # return scalar product between p_u and q_i if user and item are known, else return average rating
        if self.trainset.knows_user(u) and self.trainset.knows_item(i):
            return np.dot(self.pu[u], self.qi[i])
        else:
            return self.trainset.global_mean

In [23]:
data = surprise.Dataset.load_builtin('ml-100k')  # download the movielens dataset
data.split(2)  # split data for 2-folds cross validation

In [24]:
algo = MatrixFacto(learning_rate=.01, n_epochs=10, n_factors=10)
surprise.evaluate(algo, data, measures=['RMSE'])

Evaluating RMSE of algorithm MatrixFacto.

------------
Fold 1
Fitting data with SGD...
RMSE: 0.9779
------------
Fold 2
Fitting data with SGD...
RMSE: 0.9844
------------
------------
Mean RMSE: 0.9812
------------
------------


CaseInsensitiveDefaultDict(list,
                           {'rmse': [0.97792696538606039,
                             0.98437703504409346]})

In [25]:
algo = surprise.SVD()  # try a more sophisticated matrix factorization algorithm
surprise.evaluate(algo, data, measures=['RMSE'])

Evaluating RMSE of algorithm SVD.

------------
Fold 1
RMSE: 0.9565
------------
Fold 2
RMSE: 0.9549
------------
------------
Mean RMSE: 0.9557
------------
------------


CaseInsensitiveDefaultDict(list,
                           {'rmse': [0.95653291227934356,
                             0.95491157971612828]})