<a href="https://colab.research.google.com/github/DeokwonWang/Content_Recommend_algorithm/blob/main/ALS_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1. 학습 파라미터 초기화

*   정규화에 필요한 lambda
*   confidenc level 조정에 필요한 alpha
*   사용자와 아이템의 Latent Factor행렬의 차원 nf

In [1]:
r_lambda = 40
nf = 200
alpha = 40

#2. 학습용 평점 데이터 설정

*   임의의 10*11평점 행렬 생성(numpy)
*   사용자수 = 10 / 아이템 수 = 11

In [2]:
import numpy as np

# sample rating matrix
R = np.array([[0,0,0,4,4,0,0,0,0,0,0],
              [0,0,0,0,0,0,0,0,0,0,1],
              [0,0,0,0,0,0,0,1,0,4,0],
              [0,3,4,0,3,0,0,2,2,0,0],
              [0,5,5,0,0,0,0,0,0,0,0],
              [0,0,0,0,0,0,5,0,0,5,0],
              [0,0,4,0,0,0,0,0,0,0,5],
              [0,0,0,0,0,4,0,0,0,0,4],
              [0,0,0,0,0,0,5,0,0,5,0],
              [0,0,0,3,0,0,0,0,4,5,0]])
print(R.shape)

(10, 11)


#3. 사용자와 아이템의 Latent Factor Matrix를 초기화

아주 작은 랜덤한 값들로 행렬의 값을 초기화

In [3]:
nu = R.shape[0] # num of users
ni = R.shape[1] # num of items

# initialize X and Y with very small values
X = np.random.rand(nu, nf) * 0.01
Y = np.random.rand(ni, nf) * 0.01

print(X)

[[0.0027857  0.0091066  0.00545501 ... 0.00991567 0.00188036 0.00756238]
 [0.00923195 0.00469537 0.00692004 ... 0.00253509 0.00753107 0.00155912]
 [0.00514351 0.00290268 0.00399059 ... 0.00100873 0.00414994 0.00558701]
 ...
 [0.00064852 0.00210796 0.0061438  ... 0.00263549 0.00835789 0.00542804]
 [0.00549811 0.00204291 0.0078949  ... 0.00097738 0.00052217 0.00551092]
 [0.00776883 0.00845971 0.00414592 ... 0.00642635 0.00814739 0.00260375]]


#4.선호도 행렬 P 설정
주어진 학습용 평점 테이블을 0과 1로 된 binary rating matrix P로 바꾸어줍니다.

P를 구하는 공식

P가 0보다 크면 1
P가 0이면 0


실제 코드 구현

In [5]:
P = np.copy(R)
P[P > 0] = 1
print(P)

[[0 0 0 1 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1 0 1 0]
 [0 1 1 0 1 0 0 1 1 0 0]
 [0 1 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 1 0]
 [0 0 1 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 1 0 0 0 0 1]
 [0 0 0 0 0 0 1 0 0 1 0]
 [0 0 0 1 0 0 0 0 1 1 0]]


#5.신뢰도 행렬 C 설정
주어진 학습용 평점 테이블에 Confidence level을 적용한 C행렬 구하기

Confidence level을 구하는 공식

Cui = 1 + alpha*Rui

In [7]:
C = 1 + alpha * R
print(C)

[[  1   1   1 161 161   1   1   1   1   1   1]
 [  1   1   1   1   1   1   1   1   1   1  41]
 [  1   1   1   1   1   1   1  41   1 161   1]
 [  1 121 161   1 121   1   1  81  81   1   1]
 [  1 201 201   1   1   1   1   1   1   1   1]
 [  1   1   1   1   1   1 201   1   1 201   1]
 [  1   1 161   1   1   1   1   1   1   1 201]
 [  1   1   1   1   1 161   1   1   1   1 161]
 [  1   1   1   1   1   1 201   1   1 201   1]
 [  1   1   1 121   1   1   1   1 161 201   1]]


#6.Loss Function 설정

예측이 얼마나 정확한지 측정할 Loss Function 작성

크게 4가지 부분으로 나누어서 train시 변화 추이 살펴보기

1. predict error : (pui-xTyi)^2 
 - 0과 1로 나누어 선호 비선호를 예측한 결과의 에러
2. confidentce error : cui(pui-xTyi)^2
 - confidence level을 적용한 predict error
3. regularization : lambda(sumX + sumY)
4. total loss : confidence error + regularization

In [8]:
def loss_function(C, P, xTy, X, Y, r_lambda):
  predict_error = np.square(P - xTy)
  confidence_error = np.sum(C * predict_error)
  regularization = r_lambda * (np.sum(np.square(X)) + np.sum(np.square(Y)))
  total_loss = confidence_error + regularization
  return np.sum(predict_error), confidence_error, regularization, total_loss

#7.Optimizer 설정

loss function을 최적화시키는 Optimizer 작성

#이 부분이 핵심

x[u] = (yTCuy + lambda*I)^-1yTCuy

y[i] = (xTCix + lambda*I)^-1xTCix

In [12]:
def optimize_user(X, Y, C, P, nu, nf, r_lambda):
  yT = np.transpose(Y)
  for u in range(nu):
    Cu = np.diag(C[u])
    yT_Cu_y = np.matmul(np.matmul(yT,Cu),Y)
    lI = np.dot(r_lambda, np.identity(nf))
    yT_Cu_pu = np.matmul(np.matmul(yT, Cu), P[u])
    X[u] = np.linalg.solve(yT_Cu_y + lI, yT_Cu_pu)

def optimize_item(X, Y, C, P, nu, nf, r_lambda):
  xT = np.transpose(X)
  for i in range(ni):
    Ci = np.diag(C[:, i])
    xT_Ci_x = np.matmul(np.matmul(xT,Ci),X)
    lI = np.dot(r_lambda, np.identity(nf))
    xT_Ci_pi = np.matmul(np.matmul(xT, Ci), P[:, i])
    Y[i] = np.linalg.solve(xT_Ci_x + lI, xT_Ci_pi)

#8.학습

보통 10~15회

각 스텝별 loss function 출력하도록 구현

학습 완료 시 최종 선호도 0 or 1 예측 출력

In [13]:
predict_errors = []
confidence_errors = []
regularization_list = []
total_losses = []

for i in range(15):
  if i !=0:
    optimize_user(X, Y, C, P, nu, nf, r_lambda)
    optimize_item(X, Y, C, P, ni, nf, r_lambda)
  predict = np.matmul(X, np.transpose(Y))
  predict_error, confidence_error, regularization, total_loss = loss_function(C, P, predict, X, Y, r_lambda)

  predict_errors.append(predict_error)
  confidence_errors.append(confidence_error)
  regularization_list.append(regularization)
  total_losses.append(total_loss)

  print('--------------------------step %d--------------------------' % i)
  print('predict_error: %f' % predict_error)
  print('confidence_error: %f' % confidence_error)
  print('regularization: %f' % regularization)
  print('total loss: %f' % total_loss)

predict = np.matmul(X, np.transpose(Y))
print('final predict')
print([predict])

--------------------------step 0--------------------------
predict_error: 20.833529
confidence_error: 3140.487414
regularization: 178.829772
total loss: 3319.317186
--------------------------step 1--------------------------
predict_error: 30.980891
confidence_error: 300.375548
regularization: 640.330138
total loss: 940.705686
--------------------------step 2--------------------------
predict_error: 34.027482
confidence_error: 139.200180
regularization: 651.199625
total loss: 790.399805
--------------------------step 3--------------------------
predict_error: 32.214462
confidence_error: 120.302668
regularization: 650.944306
total loss: 771.246974
--------------------------step 4--------------------------
predict_error: 29.764042
confidence_error: 109.693400
regularization: 653.510752
total loss: 763.204152
--------------------------step 5--------------------------
predict_error: 27.371869
confidence_error: 102.705096
regularization: 656.341627
total loss: 759.046723
--------------------