# LFM梯度下降算法实现

### 0.引入依赖

In [1]:
import numpy as np
import pandas as pd

### 1.数据准备

In [2]:
# 评分矩阵R
R = np.array([[4,0,2,0,1],
             [0,2,3,0,0],
             [1,0,2,4,0],
             [5,0,0,3,1],
             [0,0,1,5,1],
             [0,3,2,4,1],])
R.shape

(6, 5)

### 2.算法实现

In [37]:
"""
@输入参数：
R：M*N的评分矩阵
K：隐特征向量个数
max_iter：最大迭代次数
alpha：步长
lamda：正则化系数

@输出：
分解之后的P,Q
P：初始化用户特征矩阵M*K
Q：初始化物品特征矩阵N*K
"""

# 给定超参数

K = 6
max_iter = 10000
alpha = 0.0002
lamda = 0.004

# 核心算法
def LFM_grad_desc( R, K=2, max_iter=5000, alpha=0.0002, lamda=0.004 ):
    # 基本维度参数定义
    M = len(R)
    N = len(R[0])
    
    # P,Q初始值，随机生成
    P = np.random.rand(M, K)
    Q = np.random.rand(N, K)
    Q = Q.T
    
    # 开始迭代
    for step in range(max_iter):
        # 对所有的用户u、物品i做遍历，对应的特征向量Pu、Qi梯度下降
        for u in range(M):
            for i in range(N):
                # 对于每一个大于0的评分，求出预测评分误差
                if R[u][i] > 0:
                    # np.dot进行矩阵乘法
                    eui = np.dot( P[u,:], Q[:, i] ) - R[u][i]
                    
                    # 代入公式，按照梯度下降算法更新当前的Pu、Qi
                    for k in range(K):
                        P[u][k] = P[u][k] - alpha * ( 2 * eui * Q[k][i] + 2 * lamda * P[u][k] )
                        Q[k][i] = Q[k][i] - alpha * ( 2 * eui * P[u][k] + 2 * lamda * Q[k][i] )
                        
        # u、i遍历完成，所有特征向量更新完成，可以得到P、Q，可以计算预测评分矩阵
        predR = np.dot( P, Q)
        
        # 计算当前损失函数
        cost = 0
        for u in range(M):
            for i in range(N):
                if R[u][i] > 0:
                    cost += ( np.dot( P[u,:], Q[:, i] ) - R[u][i] ) ** 2
                    # 加上正则化项
                    cost += lamda * ( P[u][k] ** 2 + Q[k][i] ** 2 )
        if cost < 0.1:
            break
    
    return P, Q.T, cost, step 

### 3.测试

In [38]:
P, Q, cost, step = LFM_grad_desc(R, K, max_iter, alpha, lamda)
print(P)
print(Q)
print(cost)
print(step)

predR = P.dot(Q.T)

print(R)
predR

[[ 0.61003693  0.52987132  0.76821692  0.38383342  0.68311723  0.7987107 ]
 [ 1.01565814  0.07901222  1.04822434  0.69961694  0.56271091  0.89467101]
 [ 1.39199884 -0.19756833  0.16431426  1.13613683  0.33964933 -0.26815967]
 [ 0.43271225  0.8240457   0.90940986  0.17290398  0.73881273  1.14958761]
 [ 1.05399149  1.20769089  0.48385785  0.16994301  1.37349989  0.06625611]
 [ 0.89249912  0.58724398  0.32958367  1.0957964   0.63738466  0.11618848]]
[[ 7.12024797e-01  1.48857855e+00  1.32602172e+00  7.77019068e-04
   1.21119653e+00  1.16651293e+00]
 [ 7.64465386e-01  9.93964735e-01  2.05151507e-01  1.13272219e+00
   5.49504699e-01 -1.31133063e-01]
 [ 6.86246712e-01 -2.12362417e-01  5.42280527e-01  1.05192159e+00
   5.29994767e-02  1.04119032e+00]
 [ 1.77103585e+00  7.24476431e-01  7.48281526e-01  9.41121299e-01
   1.24057917e+00 -1.01175510e-01]
 [ 6.91412491e-01  6.54478104e-02 -5.61682393e-02  2.10961661e-01
   1.00363358e-01  5.30312448e-01]]
0.0999790210014362
3897
[[4 0 2 0 1]
 [0 2 

array([[4.00118264, 1.85604177, 1.99427752, 3.16700394, 0.98641698],
       [3.95650034, 2.05437862, 2.94593281, 3.90636941, 1.32705606],
       [1.01437916, 2.4101992 , 2.02023798, 3.96283522, 1.07184698],
       [4.97664642, 1.78751702, 2.03308389, 3.0068144 , 1.02230115],
       [4.93081579, 3.04396082, 1.04976345, 4.96083103, 0.98944312],
       [2.8550603 , 2.90984168, 1.9739401 , 4.06296175, 0.99376403]])

### 4.结果误差记录
****************************
| 隐特征向量个数K | 误差cost | 迭代次数step |
| :-------------: | :------- | :----------: |
| 2 | 0.57409404603735 | 9999 |
| 5 | 0.1010729862839729 | 9999 |
| 6 | 0.09997733414883572 | 4516 |
| 7 | 0.0999904870901497 | 4644 |