In [1]:
import numpy as np
import pandas as pd

In [5]:
col_names = ['user_id', 'movie_id', 'rating', 'timestamp']
df = pd.read_csv('../Data/MovieLens/ml-100k/u.data', sep='\t', names=col_names)
train_df = pd.read_csv('../Data/MovieLens/ml-100k/ua.base', sep='\t', names=col_names)
test_df = pd.read_csv('../Data/MovieLens/ml-100k/ua.test', sep='\t', names=col_names)

In [123]:
n_users = len(df.loc[:,'user_id'].unique())
n_items = len(df.loc[:,'movie_id'].unique())

In [124]:
# R matrix
R = np.zeros((n_users, n_items))
for user_id, movie_id, rating, timestamp in train_df.values:
    R[user_id-1, movie_id-1] = rating

In [149]:
R_test = np.zeros((n_users, n_items))
for user_id, movie_id, rating, timestamp in test_df.values:
    R_test[user_id-1, movie_id-1] = rating

In [141]:
# hyperparameter
k = 10
epochs = 100
lamda3 = 0.005
lr = 0.01

In [142]:
# initialize
P = np.random.randn(k, n_users)
Q = np.random.randn(k, n_items)

mu = np.mean(train_df.loc[:, 'rating'])
B_u = np.random.randn(n_users)
B_i = np.random.randn(n_items)

In [155]:
for epoch in range(epochs):
    for user_id, movie_id, rating, timestamp in train_df.values:
        r_ui = R[user_id-1, movie_id-1]
        p_u, q_i = P[ : , user_id-1], Q[ :, movie_id-1] # user & item
        b_u, b_i = B_u[user_id-1], B_i[movie_id-1]
        r_ui_hat = mu + b_u + b_i + np.dot(p_u.T, q_i) # predict
        
        # minimize
        #target = (r_ui - r_ui_hat)**2 + lamda3*(int(np.linalg.norm(p_u, ord=2)) +int(np.linalg.norm(q_i, ord=2)) + b_u**2 + b_i**2)
        #target = (r_ui - r_ui_hat)**2 
        
        # calculate gradient
        grad_b_u = -2*(r_ui - r_ui_hat) + 2*lamda3*b_u
        grad_b_i = -2*(r_ui - r_ui_hat) + 2*lamda3*b_i
        grad_p_u = -2*(r_ui - r_ui_hat)*q_i + 2*lamda3*p_u
        grad_q_i = -2*(r_ui - r_ui_hat)*p_u + 2*lamda3*q_i
        
        # update
        B_u[user_id-1] = b_u - lr * grad_b_u
        B_i[movie_id-1] = b_i - lr * grad_b_i
        P[ :, user_id-1] = p_u - lr * grad_p_u
        Q[ :, movie_id-1] = q_i - lr * grad_q_i
        
        target = (r_ui - r_ui_hat)**2 + lamda3*(int(np.linalg.norm(p_u, ord=2)) +int(np.linalg.norm(q_i, ord=2)) + b_u**2 + b_i**2)
    # training error    
    print("epoch : {}, target : {}".format(epoch+1, target))

    # test set
    SSE = 0
    for user_id, movie_id, rating, timestamp in test_df.values:
        r_ui = R_test[user_id-1, movie_id-1]
        p_u, q_i = P[ : , user_id-1], Q[ :, movie_id-1] # user & item
        b_u, b_i = B_u[user_id-1], B_i[movie_id-1]
        r_ui_hat = mu + b_u + b_i + np.dot(p_u.T, q_i) # predict
        
        SSE += (r_ui - r_ui_hat)**2
    
    MSE = SSE/len(test_df)
    RMSE = np.sqrt(MSE)
    
    # test error
    print("epoch : {}, test_error : {}".format(epoch+1, RMSE))

epoch : 1, target : 0.5918325312372333
epoch : 1, test_error : 1.165651441245464
epoch : 2, target : 0.6229210252627339
epoch : 2, test_error : 1.1650477071402618
epoch : 3, target : 0.6376761364398545
epoch : 3, test_error : 1.1645073233393513
epoch : 4, target : 0.6363117317539846
epoch : 4, test_error : 1.1639789833834737
epoch : 5, target : 0.6207338538742466
epoch : 5, test_error : 1.163456766690745
epoch : 6, target : 0.5933967158450623
epoch : 6, test_error : 1.16293974323984
epoch : 7, target : 0.5569152975062158
epoch : 7, test_error : 1.1624286460512112
epoch : 8, target : 0.5138802128551865
epoch : 8, test_error : 1.161925086218924
epoch : 9, target : 0.4667291978926983
epoch : 9, test_error : 1.1614311131226842
epoch : 10, target : 0.41765072596386016
epoch : 10, test_error : 1.160948925721061
epoch : 11, target : 0.3685191816785677
epoch : 11, test_error : 1.1604806796003806
epoch : 12, target : 0.3208617085955087
epoch : 12, test_error : 1.1600283602878423
epoch : 13, tar

epoch : 99, target : 0.05148326465019597
epoch : 99, test_error : 1.1762407722433654
epoch : 100, target : 0.050880019834341775
epoch : 100, test_error : 1.1766447560210584
