In [1]:
import numpy as np
# import data

In [2]:
# toy data
trust=np.array([[0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,1.0,0.0,0.8],
            [0.8,0.0,0.0,0.0,0.0,0.0],
            [0.8,1.0,0.0,0.0,0.6,0.0],
            [0.0,0.0,0.4,0.0,0.0,0.8],
            [0.0,0.0,0.0,0.0,0.0,0.0]])

train=np.array([[5.0,2.0,0.0,3.0,0.0,4.0,0.0,0.0],
            [4.0,3.0,0.0,0.0,5.0,0.0,0.0,0.0],
            [4.0,0.0,2.0,0.0,0.0,0.0,2.0,4.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [5.0,1.0,2.0,0.0,4.0,3.0,0.0,0.0],
            [4.0,3.0,0.0,2.0,4.0,0.0,3.0,5.0]])

In [3]:
# train = data.train
# test = data.test
# trust = data.trust

In [4]:
class SoRec():
    
    def __init__(self, train, test, trust, dim, learning_rate, epochs, verbose = False):
        """
        param train : Rating Matrix for training
        param test : Rating Matrix for Test
        param k : latent dimension
        param learning_rate : alpha on weight update
        param epochs : training epochs
        param verbose : print status
        """
        
        self.train = train
        self.max_rate = train.max()
        self.test = test
        self.num_users, self.num_items = train.shape
        self.trust = trust
        self.lambda_C = 0.01
        self.lambda_U = 0.01
        self.lambda_V = 0.01
        self.lambda_Z = 0.01
        self.dim = dim
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.verbose = verbose
        
        
    def fit(self):
        """
        training Matrix Factorization : update matrix latent weight and bias
        """
        # init latent features
        self.U = np.random.normal(scale = 1.0/self.dim, size=(self.num_users, self.dim))
        self.V = np.random.normal(scale = 1.0/self.dim, size=(self.num_items, self.dim))
        self.Z = np.random.normal(scale = 1.0/self.dim, size=(self.num_users, self.dim))
        
        self.scale_rate()
        self.convert_trust()
        
        self.training_process = []
        for epoch in range(self.epochs):
            for i in range(self.num_users):
                for j in range(self.num_items):
                    for k in range(self.num_users):
                        self.gradient_descent(i, j, k)
                        
            train_cost, test_cost = self.cost()
            self.training_process.append((epoch, train_cost, test_cost))
            
            if self.verbose == True and ((epoch + 1) % 10 == 0 ):
                print("Iteration : %d, train_cost = %.4f, test_cost = %.4f" % (epoch+1, train_cost, test_cost))
    
    
    def convert_trust(self):
        
        converted_trust = np.zeros((self.num_users, self.num_users))
        for i in range(self.num_users):
            for k in range(self.num_users):
                d_vk = np.count_nonzero(self.trust[:, k])
                d_vi = np.count_nonzero(self.trust[i, :])
                converted_trust[i, k] = (d_vk / (d_vi + d_vk))**(1/2) * self.trust[i, k] 
        
        self.trust = converted_trust
    
    # scale ratings between 0 and 1
    def scale_rate(self):    
        for i in range(self.num_users):
            for j in range(self.num_items):
                if self.train[i, j] != 0:
                    self.train[i, j] = (self.train[i, j] - 1) / (self.max_rate - 1)
                    
                    
    def rescale_rate(self, x):
        
        return (self.max_rate - 1)*x + 1
    
    
    def logistic(self, x):
        """
        function logistic
        """
        return 1 / (1 + np.exp(-x))
    
    
    def d_logistic(self, x):
        """
        function derivative logistic
        """
        return np.exp(x)/(1+np.exp(x))**2
    
    
    def cost(self):
        """
        compute RMSE
        """
        xi, yi = self.train.nonzero() # 0 이 아닌 값의 index 반환
        test_x, test_y = self.test.nonzero()
        predicted = self.logistic(self.get_complete_matrix())
        cost_train = 0
        cost_test = 0
        
        for x, y in zip(xi, yi):
            cost_train += np.abs(self.train[x, y] - predicted[x, y])
        
        for x, y in zip(test_x, test_y):
            cost_test += np.abs(self.test[x, y] - predicted[x, y])
        
        return cost_train/len(xi), cost_test/len(test_x)
    
    
    def gradient_descent(self, i, j, k):
        """
        gradient descent function
        param i : user index
        param j : item index
        """
        rating_pred = self.U[i, :].dot(self.V[j, :].T)
        trust_pred = self.U[i, :].dot(self.Z[k, :].T)
        
        if self.trust[i, k] > 0:
            self.U[i, :] -= self.learning_rate * self.lambda_C * self.d_logistic(trust_pred)*(self.logistic(trust_pred)-self.trust[i, k])*self.Z[k, :]
            self.Z[k, :] -= self.learning_rate * self.lambda_C * self.d_logistic(trust_pred)*(self.logistic(trust_pred)-self.trust[i, k])*self.U[i, :]        
        
        if self.train[i, j] > 0 :
            self.U[i, :] -= self.learning_rate * (self.d_logistic(rating_pred)*(self.logistic(rating_pred)-self.train[i, j])*self.V[j, :])
            self.V[j, :] -= self.learning_rate * (self.d_logistic(rating_pred)*(self.logistic(rating_pred)-self.train[i, j])*self.U[i, :])
        
        self.U[i, :] -= self.learning_rate * (self.lambda_U * self.U[i, :])
        self.V[j, :] -= self.learning_rate * (self.lambda_V * self.V[j, :])
        self.Z[k, :] -= self.learning_rate * (self.lambda_Z * self.Z[k, :])
        
        
    
    def get_complete_matrix(self):
        """
        compute complete matrix
        """
        
        return self.U.dot(self.V.T)
    
    
    def print_results(self):
        """
        print fit results
        """

        print("Final R matrix:")
        print(self.rescale_rate(self.get_complete_matrix()))
        print("Final RMSE:")
        print(self.training_process[self.epochs-1][2])

In [5]:
if __name__ == "__main__":
    
    np.random.seed(7)
    np.seterr(all="warn")
    
    model = SoRec(train, train, trust, dim=5, learning_rate=0.01, epochs=200, verbose=True)
    # regression parameter 2개
    model.fit()
    model.print_results()

Iteration : 10, train_cost = 0.2313, test_cost = 0.2313
Iteration : 20, train_cost = 0.2278, test_cost = 0.2278
Iteration : 30, train_cost = 0.2243, test_cost = 0.2243
Iteration : 40, train_cost = 0.2206, test_cost = 0.2206
Iteration : 50, train_cost = 0.2166, test_cost = 0.2166
Iteration : 60, train_cost = 0.2122, test_cost = 0.2122
Iteration : 70, train_cost = 0.2075, test_cost = 0.2075
Iteration : 80, train_cost = 0.2024, test_cost = 0.2024
Iteration : 90, train_cost = 0.1970, test_cost = 0.1970
Iteration : 100, train_cost = 0.1914, test_cost = 0.1914
Iteration : 110, train_cost = 0.1856, test_cost = 0.1856
Iteration : 120, train_cost = 0.1798, test_cost = 0.1798
Iteration : 130, train_cost = 0.1741, test_cost = 0.1741
Iteration : 140, train_cost = 0.1688, test_cost = 0.1688
Iteration : 150, train_cost = 0.1636, test_cost = 0.1636
Iteration : 160, train_cost = 0.1587, test_cost = 0.1587
Iteration : 170, train_cost = 0.1542, test_cost = 0.1542
Iteration : 180, train_cost = 0.1503, te