## implementation of Bayesian Personalized Ranking for Implicit Feedback

In [1]:
import numpy as np
import data
import random

In [2]:
train = data.train
test = data.test

In [5]:
class MatrixFactorization():
    
    def __init__(self, train, test, k, learning_rate, reg_param, epsilon):
        """
        param R : Rating Matrix
        param k : latent parameter
        param learning_rate : alpha on weight update
        param reg_param : regularization parameter
        """
        
        self._R = train
        self._test = test
        self._X = np.array(np.vectorize(lambda x: 0 if x==0 else 1)(train), dtype = np.float64) # create X matrix : implicit feedbacks (binary)
        self._num_users, self._num_items = train.shape
        self._k = k
        self._learning_rate = learning_rate
        self._reg_param = reg_param
        self._epsilon = epsilon
        
        
    def fit(self):
        """
        training Matrix Factorization : update matrix latent weight and bias
        """
        
        # init latent features
        self._W = np.random.normal(scale = 1.0/self._k, size=(self._num_users, self._k))
        self._H = np.random.normal(scale = 1.0/self._k, size=(self._num_items, self._k))
        
        
        # train until cost converges
        count = 0
        self._training_process = []
        
        for i in range(100) :
            count += 1

            u = random.choice(self._R.nonzero()[0])
            i = random.choice(self._R[u].nonzero()[0]) 
            j = random.choice(self._R[u].nonzero()[0]) 
            
            self.gradient_descent(u, i, j)
            print(count)
            
            if count % 10 == 0 :
                log_p = self.log_posterior()
                self._training_process.append((count, log_p))
                print("Iteration : %d, log_posterior = %.4f" % (count, log_p))
    
    
    def sigmoid(self, x):
        """
        return sigmoid 
        """
        return 1 / (1 + np.exp(-x))
    
    
    def log_posterior(self):
        """
        compute log posterior of params
        """
        lp = 0
        non_zero_u = self._R.nonzero()[0] # 0 이 아닌 값의 index 반환
        X = self._W.dot(self._H.T)
        
        for u in non_zero_u : # M
            temp_i = self._R[u].nonzero()[0]
            for i in temp_i: # N
                for j in temp_i: # N
                    lp += np.log(self.sigmoid(X[u, i] - X[u, j]))
                    
        return lp - self._reg_param * (np.linalg.norm(self._W) + np.linalg.norm(self._H))
    
    
    def gradient_descent(self, u, i, j):
        """
        gradient descent function
        param u : user index
        param i : item index i
        param j : item index j
        """
        sigmoid = self.sigmoid(self._W[u].dot(self._H[i].T) - self._W[u].dot(self._H[j].T))
        self._W[u, :] -= self._learning_rate * (sigmoid * (self._H[i, :] - self._H[j, :]) - self._reg_param * self._W[u, :])
        self._H[i, :] -= self._learning_rate * (sigmoid * self._W[u, :] - self._reg_param * self._H[i, :])
        self._H[j, :] -= self._learning_rate * (-1 * sigmoid * self._W[u, :] - self._reg_param * self._H[j, :])
    
    
    def print_results(self):
        """
        print fit results
        """

        print("Final R matrix:")
        print(self.get_complete_matrix())
        print("Final RMSE:")
        print(self._training_process[self._epochs-1])

In [None]:
np.random.seed(7)
np.seterr(all="warn")
    
factorizer = MatrixFactorization(train, test, k=15, learning_rate=0.001, reg_param=0.001, epsilon = 0.1)

# regression parameter 2개
factorizer.fit()

1
2
3
4
5
6
7
8
9
10
