## implementation of Bayesian Personalized Ranking for Implicit Feedback

https://arxiv.org/ftp/arxiv/papers/1205/1205.2618.pdf

In [6]:
import numpy as np
import random
import data
from timeit import default_timer as timer

In [7]:
ml_100k = data.ml_100k
train = data.train
test = data.test

In [6]:
class MatrixFactorization():
    
    def __init__(self, data, train, test, k, learning_rate, reg_param, epsilon):
        """
        param R : Rating Matrix
        param k : latent parameter
        param learning_rate : alpha on weight update
        param reg_param : regularization parameter
        """
        
        self._A_I = np.array(np.vectorize(lambda x: 0 if x==0 else 1)(data), dtype = np.float64)
        self._X = np.array(np.vectorize(lambda x: 0 if x==0 else 1)(train), dtype = np.float64) # create X matrix : implicit feedbacks (binary)
        self._X_test = np.array(np.vectorize(lambda x: 0 if x==0 else 1)(test), dtype = np.float64)
        self._num_users, self._num_items = train.shape
        self._k = k
        self._learning_rate = learning_rate
        self._reg_param = reg_param
        self._epsilon = epsilon
        
        
    def fit(self):
        """
        training Matrix Factorization : update matrix latent weight and bias
        """
        
        # init latent features
        self._W = np.random.normal(scale = 1.0/self._k, size=(self._num_users, self._k))
        self._H = np.random.normal(scale = 1.0/self._k, size=(self._num_items, self._k))
        
        
        # train until cost converges
        count = 0
        self._training_process = []
        for j in range(10):
            start = timer()
            for i in range(80000) :    

                count += 1
                # randomly choice _ Bootstrap
                u = random.choice(self._X.nonzero()[0])
                i = random.choice(self._X[u].nonzero()[0]) 
                j = random.choice(np.argwhere(self._X[u] == 0).T[0]) 
                self.gradient_descent(u, i, j)
            print("complete 80000 iterations, time :%.4f" % (timer()-start))

            start_AUC = timer()
            AUC = self.compute_AUC()
            self._training_process.append((count, AUC))
            print("Iteration : %d, AUC = %.4f, AUC computation time: %.4f" % (count, AUC, timer()-start_AUC))

    
    def sigmoid(self, x):
        """
        return sigmoid 
        """
        return 1 / (1 + np.exp(-x))

    
    
    def gradient_descent(self, u, i, j):
        """
        gradient descent function
        param u : user index
        param i : item index i
        param j : item index j
        """
        xuij_hat = self._W[u].dot(self._H[i].T) - self._W[u].dot(self._H[j].T)
        sigmoid = self.sigmoid(xuij_hat) * np.exp(-xuij_hat)
        self._W[u, :] += self._learning_rate * (sigmoid * (self._H[i] - self._H[j]) - self._reg_param * self._W[u])
        self._H[i, :] += self._learning_rate * (sigmoid * self._W[u] - self._reg_param * self._H[i])
        self._H[j, :] += self._learning_rate * (-1 * sigmoid * self._W[u] - self._reg_param * self._H[j])
        
        
    def compute_AUC(self):
        
        self._X_hat = self._W.dot(self._H.T)
        u_nonzero, i_nonzero = self._X_test.nonzero()
        num = 0
        
        for u in u_nonzero :
            temp = 0
            temp_i = self._X_test[u].nonzero()[0]
            temp_j = np.argwhere(self._A_I[u] == 0).T[0]
            for i in temp_i :
                for j in temp_j :
                    if self._X_hat[u, i] > self._X_hat[u, j] :
                        temp += 1
            num += (temp / (len(temp_i)*len(temp_j)))
        auc = num / len(u_nonzero)
        
        return auc

In [7]:
np.random.seed(7)
np.seterr(all="warn")

factorizer = MatrixFactorization(ml_100k, train, test, k=40, learning_rate=0.1, reg_param=0.01, epsilon = 0.1)

# regression parameter 2개
factorizer.fit()

complete 80000 iterations, time :790.2666
Iteration : 80000, AUC = 0.8387, AUC computation time: 1425.3879
complete 80000 iterations, time :790.2922
Iteration : 160000, AUC = 0.8648, AUC computation time: 1406.2161
complete 80000 iterations, time :790.0098
Iteration : 240000, AUC = 0.8846, AUC computation time: 1403.2179
complete 80000 iterations, time :790.0381
Iteration : 320000, AUC = 0.8940, AUC computation time: 1421.6086
complete 80000 iterations, time :790.1541
Iteration : 400000, AUC = 0.8989, AUC computation time: 1400.0575
complete 80000 iterations, time :790.0356
Iteration : 480000, AUC = 0.9027, AUC computation time: 1431.5116
complete 80000 iterations, time :790.1428
Iteration : 560000, AUC = 0.9037, AUC computation time: 1412.4502
complete 80000 iterations, time :790.8554
Iteration : 640000, AUC = 0.9033, AUC computation time: 1408.6954
complete 80000 iterations, time :789.9466
Iteration : 720000, AUC = 0.9040, AUC computation time: 1412.7861
complete 80000 iterations, ti