In [1]:
import numpy as np

R = np.array([[4, np.nan, np.nan, 2, np.nan],
              [np.nan, 5, np.nan, 3, 1],
              [np.nan, np.nan, 3, 4, 4],
             [5, 2, 1, 2, np.nan]])

num_users, num_items = R.shape
K = 3

np.random.seed(1)
P = np.random.normal(scale=1./K, size=(num_users, K))
Q = np.random.normal(scale=1./K, size=(num_items, K))

In [2]:
from sklearn.metrics import mean_squared_error

def get_rmse(R, P, Q, non_zeros):
    error = 0

    full_pred_matrix = np.dot(P, Q.T)

    x_non_zero_ind = [non_zero[0] for non_zero in non_zeros]
    y_non_zero_ind = [non_zero[0] for non_zero in non_zeros]
    R_non_zeros = R[x_non_zero_ind, y_non_zero_ind]
    full_pred_matrix_non_zeros = full_pred_matrix[x_non_zero_ind, y_non_zero_ind]
    mse = mean_squared_error(R_non_zeros, full_pred_matrix_non_zeros)
    rmse = np.sqrt(mse)

    return rmse

In [6]:
non_zeros = [ (i, j, R[i, j]) for i in range(num_users) for j in range(num_items) if R[i, j]>0]

steps = 5000
learning_rate=0.01
r_lambda = 0.01

for step in range(steps):
    for i, j, r in non_zeros:
        eij = r - np.dot(P[i, :], Q[j, :].T)
        P[i, :] = P[i, :] + learning_rate*(eij * Q[j, :] - r_lambda*P[i, :])
        Q[j, :] = Q[j, :] + learning_rate*(eij * P[i, :] - r_lambda*Q[j, :])

        rmse = get_rmse(R, P, Q, non_zeros)
        if (step % 50) == 0:
            print('### iteration step :', step, 'rmse :', rmse)

### iteration step : 0 rmse : 0.016473407017091678
### iteration step : 0 rmse : 0.016437409574425653
### iteration step : 0 rmse : 0.015961302860831025
### iteration step : 0 rmse : 0.01620577810677723
### iteration step : 0 rmse : 0.016364054411874218
### iteration step : 0 rmse : 0.016274287522843947
### iteration step : 0 rmse : 0.016421374829312618
### iteration step : 0 rmse : 0.016430709517804197
### iteration step : 0 rmse : 0.0168727532704806
### iteration step : 0 rmse : 0.01699896701312035
### iteration step : 0 rmse : 0.01697566600035547
### iteration step : 0 rmse : 0.01649918467629631
### iteration step : 50 rmse : 0.016422148044438846
### iteration step : 50 rmse : 0.016386921615453486
### iteration step : 50 rmse : 0.015909420386211837
### iteration step : 50 rmse : 0.016149539471700364
### iteration step : 50 rmse : 0.016308663423854352
### iteration step : 50 rmse : 0.01621849008254169
### iteration step : 50 rmse : 0.016363727849976847
### iteration step : 50 rmse : 

In [7]:
pred_matrix = np.dot(P, Q.T)
print('예측 행렬 :\n', np.round(pred_matrix, 3))

예측 행렬 :
 [[3.991 1.756 1.146 1.999 1.601]
 [4.993 4.978 0.91  2.986 1.004]
 [5.554 1.946 2.988 3.979 3.985]
 [4.972 2.003 1.004 2.005 1.504]]


In [5]:
R

array([[ 4., nan, nan,  2., nan],
       [nan,  5., nan,  3.,  1.],
       [nan, nan,  3.,  4.,  4.],
       [ 5.,  2.,  1.,  2., nan]])