In [1]:
import numpy as np

In [2]:
class MatrixFactorization(object):
    def __init__(self, data, K):
        '''
        Arguments:
        - data    : 2 dimensional rating matrix
        - K       : number of latent dimensions
        '''
        
        self.R = np.matrix(data)
        self.D = np.zeros( self.R.shape )
        self.K = K
           
        # User and Product biases
        self.b   = np.nanmean(self.R)
        self.u_b = np.zeros( self.R.shape[0] )
        self.p_b = np.zeros( self.R.shape[1] )
        
        # User and Product matrix
        self.U = np.random.uniform( size=(self.R.shape[0], K) )
        self.P = np.random.uniform( size=(K, self.R.shape[1]) )
    
    def _compure_error(self):
        self.D = (self.R - self.estimate_all())
        
        return self.D
    
    def train(self, alpha=0.1, beta=0.02, iterations=1000):
        '''
        Arguments:
        - alpha   : learning-rate 
        - beta    : regularization-rate
        '''
        
        for _ in range(iterations):
            self._compure_error()
            
            for i in range(self.R.shape[0]):      
                for j in range(self.R.shape[1]):
                    for k in range(self.K):
                        #update User and Product matrix
                        U_ik = alpha * ( self.P[k, j] * self.D[i, j] - beta * self.U[i, k])
                        P_kj = alpha * ( self.U[i, k] * self.D[i, j] - beta * self.P[k, j])
                        if np.isfinite(U_ik):
                            self.U[i, k] += U_ik
                        if np.isfinite(P_kj):
                            self.P[k, j] += P_kj
                            
                        #update User and Product biases
                        Ub_i = alpha * ( self.D[i, j] - beta * self.u_b[i] )
                        Pb_j = alpha * ( self.D[i, j] - beta * self.p_b[j] )
                        if np.isfinite(Ub_i):
                            self.u_b[i] += Ub_i
                        if np.isfinite(Pb_j):
                            self.p_b[j] += Pb_j      
            
            #non-negativity
            #self.U = self.U.clip(min=0)
            #self.P = self.P.clip(min=0)
            
        return np.nansum(np.nansum(abs(self._compure_error())))
    
    def estimate_all(self):
        return self.U.dot(self.P) + self.b + self.u_b[:, np.newaxis] + self.p_b[np.newaxis, :]
    
    def estimate(self, x, y):
        return self.U[x, :].dot(self.P[:, y]) + self.b + self.u_b[x] + self.p_b[y]

In [3]:
#test data

R = np.array([
    [5, 3, np.NaN, 1],
    [4, np.NaN, np.NaN, 1],
    [1, 1, np.NaN, 5],
    [1, np.NaN, np.NaN, 4],
    [np.NaN, 1, 5, 4],
])

In [4]:
MF = MatrixFactorization(data=R, K=2)

In [5]:
error = MF.train(alpha=0.1, beta=0.002, iterations=20)
print('error:', error)

error: 0.26834252751


In [6]:
print('estimated matrix:')
MF.estimate_all()

estimated matrix:


array([[ 5.00805464,  2.98411143,  4.49311611,  1.00635882],
       [ 3.99243489,  2.17574913,  4.46573376,  0.99903843],
       [ 1.03987866,  0.9419602 ,  4.97507408,  5.00599279],
       [ 0.98549215,  0.61960411,  4.79996952,  3.9906916 ],
       [ 1.55335905,  1.06170269,  4.96490103,  4.00498465]])

In [7]:
print('User feature matrix:')
MF.U

User feature matrix:


array([[-0.61030899,  1.32046386],
       [ 0.15603655,  1.37156962],
       [ 0.60422337, -0.93885464],
       [ 1.05725534, -0.33659867],
       [ 0.83774909, -0.25396097]])

In [8]:
print('product feature matrix:')
MF.P

product feature matrix:


array([[-0.51771884, -0.20174713,  0.85400494,  0.97951447],
       [ 1.49983659,  0.81721177,  0.26754861, -1.22194507]])