In [1]:
import numpy as np
import pandas as pd
import data

In [2]:
train = data.train
test = data.test

In [3]:
class MatrixFactorization():
    
    def __init__(self, train, test, k, epochs, verbose = False):
        """
        param R : Rating Matrix
        param k : latent parameter
        param learning_rate : alpha on weight update
        param reg_param : regularization parameter
        param epochs : training epochs
        param verbose : print status
        """
        
        self._R = train
        self._test = test
        self._I = np.array(np.vectorize(lambda x: 0 if x==0 else 1)(train), dtype = np.float64) # implicit matrix는 어떻게 하는 것이 좋을지?
        self._mask = self._I == 1 # indicating matrix
        self._N = 1./np.linalg.norm(self._I, axis=1)
        self._num_users, self._num_items = train.shape
        self._k = k
        self._learning_rate_1 = 0.001
        self._learning_rate_2 = 0.001
        self._learning_rate_3 = 0.001
        self._reg_param_6 = 0.001
        self._reg_param_7 = 0.001
        self._reg_param_8 = 0.001
        self._epochs = epochs
        self._verbose = verbose
        
        # neighborhood method
        self._similarity_k = 100
        self._similarity_lambda = 100
        

    def fit(self):
        """
        training Matrix Factorization : update matrix latent weight and bias
        """
        
        # init latent features _ how to initialize?
        self._P = np.random.normal(0, scale = 1.0/self._k, size=(self._num_users, self._k))
        self._Q = np.random.normal(0, scale = 1.0/self._k, size=(self._num_items, self._k))
        self._Y = np.random.normal(0, scale = 1.0/self._k, size=(self._num_items, self._k))
        self._W = np.random.normal(0, scale = 1.0/self._num_items, size=(self._num_items, self._num_items))
        self._C = np.random.normal(0, scale = 1.0/self._num_items, size=(self._num_items, self._num_items))
        
        # init biases
        self._b_P = np.zeros(self._num_users)
        self._b_Q = np.zeros(self._num_items)
        self._b = np.mean(self._R[np.where(self._R != 0)]) # 0이 아닌 rating에 대해 평균
        
        # get similarity matrix
        self._similarity_I = self.get_similar()
        
        # train while epochs
        self._training_process = []
        for epoch in range(self._epochs):
            # rating이 존재하는 index를 기준으로 training
            for u in range(self._num_users):
                for i in range(self._num_items):
                    if self._R[u, i] > 0:
                        self.gradient_descent(u, i, self._R[u, i])
                        
            train_cost, test_cost = self.cost()
            self._training_process.append((epoch, train_cost, test_cost))
            
            if self._verbose == True and ((epoch + 1) % 10 == 0 ):
                print("Iteration : %d, train_cost = %.4f, test_cost = %.4f" % (epoch+1, train_cost, test_cost))
        
    
    def cost(self):
        """
        compute RMSE
        """
        xi, yi = self._R.nonzero() # 0 이 아닌 값의 index 반환
        test_x, test_y = self._test.nonzero()
        predicted = self.get_complete_matrix()
        cost_train = 0
        cost_test = 0
        
        for x, y in zip(xi, yi):
            cost_train += pow(self._R[x, y] - predicted[x, y], 2)
        
        for i, j in zip(test_x, test_y):
            cost_test += pow(self._test[i, j] - predicted[i, j], 2)
        
        return np.sqrt(cost_train/len(xi)), np.sqrt(cost_test/len(test_x))
        
        
    def gradient(self, error, u, i):
        """
        gradient of latent feature for GD
        param error : rating - prediction error
        param u : user index
        param i : item index
        """
        
        dp = (error * self._Q[i, :]) - (self._reg_param_7 * self._P[u, :])
        dq = (error * (self._P[u, :] + self._N[u] * self._Y[self._mask[u,:], :].sum(axis=0))) - (self._reg_param_7 * self._Q[i, :])
        
        j = self._I[u, :].nonzero()
        dy = error * self._N[u] * self._Q[i, :] - self._reg_param_7 * self._Y[j, :]

        Rk_iu = 1./np.linalg.norm(self._I[u] * self._similarity_I[i])
        Nk_iu = 1./np.linalg.norm(self._I[u] * self._similarity_I[i])   
        # j_2 = (self._I[u] * self._similarity_I[i]).nonzero() # user rated * similar item (broadcasting) -> returns nonzero index
        temp_mask = self._I[u] * self._similarity_I[i] == 1
        
        dw = error * Rk_iu * (self._R[u, temp_mask] - self._b - self._b_P[u] - self._b_Q[temp_mask]) - self._reg_param_8 * self._W[i, temp_mask]
        dc = error * Nk_iu - self._reg_param_8 * self._C[i, temp_mask]
        
        return dp, dq, dy, j , dw, dc, temp_mask
    
    
    def gradient_descent(self, u, i, rating):
        """
        gradient descent function
        param u : user index
        param i : item index
        param rating : rating of (u, i)
        """
        
        prediction = self.get_prediction(u,i)
        error = rating - prediction
        
        self._b_P[u] += self._learning_rate_1 * (error - self._reg_param_6 * self._b_P[u])
        self._b_Q[i] += self._learning_rate_1 * (error - self._reg_param_6 * self._b_Q[i])
        
        dp, dq, dy, j, dw, dc, temp_mask = self.gradient(error, u, i)
        
        self._P[u, :] += self._learning_rate_2 * dp
        self._Q[i, :] += self._learning_rate_2 * dq
        self._Y[j, :] += self._learning_rate_2 * dy
        self._W[i, temp_mask] += self._learning_rate_3 * dw
        self._C[i, temp_mask] += self._learning_rate_3 * dc
        
        
    def get_similar(self):
        """
        get similar index matrix
        """
        r = np.corrcoef(self._R, rowvar = 0)
        
        n_matrix = np.zeros((self._num_items, self._num_items))
        for i in range(self._num_items):
            for j in range(self._num_items):
                n_ij = (self._I.T[i] * self._I.T[j]).sum()
                n_matrix[i, j] = n_ij
        
        similarity = np.zeros((self._num_items, self._num_items))
        for i in range(self._num_items):
            for j in range(self._num_items):
                similarity[i, j] = (n_matrix[i, j] / (n_matrix[i, j] + self._similarity_lambda)) * r[i, j]
                
        similarity_I = np.zeros((self._num_items, self._num_items))
        for i in range(self._num_items):
            idx = similarity[i].argsort()[-self._similarity_k:]
            similarity_I[i, idx] = 1
        
        return similarity_I
        
    
    def get_prediction(self, u, i):
        """
        get predicted rating by user i on item j
        """
        
        Rk_iu = 1./np.linalg.norm(self._I[u] * self._similarity_I[i])
        Nk_iu = 1./np.linalg.norm(self._I[u] * self._similarity_I[i])
        
        temp_mask = self._I[u] * self._similarity_I[i] == 1 # indicating similarity matrix
        
        temp_1 = self._b + self._b_P[u] + self._b_Q[i] + self._Q[i, :].T.dot(self._P[u, :] + self._N[u] * (self._Y[self._mask[u,:], :].sum(axis=0)))
        temp_2 = Rk_iu * (((self._R[u, temp_mask] - ((self._b + self._b_P[:, np.newaxis] + self._b_Q[np.newaxis, :])[u, temp_mask])) * self._W[i, temp_mask]).sum(axis= 0))
        temp_3 = Nk_iu * (self._C[i, temp_mask].sum()) 
        
        return temp_1 + temp_2 + temp_3
    

    def get_complete_matrix(self):
        """
        compute complete matrix
        """
        
        predictions = np.zeros([self._num_users, self._num_items])
        for u in range(self._num_users):
            for i in range(self._num_items):
                predictions[u, i] = self.get_prediction(u, i)
                
        predictions = np.array(predictions, dtype = np.float64)
        
        return predictions
    
    
    def print_results(self):
        """
        print fit results
        """

        print("Final R matrix:")
        print(self.get_complete_matrix())
        print("Final RMSE:")
        print(self._training_process[self._epochs-1][1])

In [None]:
np.random.seed(7)
np.seterr(all="warn")
    
factorizer = MatrixFactorization(train, test, k=40, epochs=80, verbose=True)
# regression parameter 2개
factorizer.fit()
factorizer.print_results()

  c /= stddev[:, None]
  c /= stddev[None, :]


## 궁금한점
implicit feedback 데이터는 어떻게 해야하는지?  
training dataset과 test dataset 분리 어떤 방식으로 하는지?