# LFM梯度下降算法实现

### 0.引入依赖

In [1]:
import numpy as np
import pandas as pd

### 1.数据准备

In [5]:
# 评分矩阵
R = np.array( [[4, 0, 2, 0, 1],
               [0, 2, 3, 0, 0],
               [1, 0, 2, 4, 0],
               [5, 0, 0, 3, 1],
               [0, 0, 1, 5, 1],
               [0, 3, 2, 4, 1],] )
R.shape

(6, 5)

### 2.算法实现

In [12]:
"""
@输入参数：
R：M*N的评分矩阵
K：隐特征向量维度
max_iter：最大迭代次数
alpha：步长
lamda：正则化系数

@输出：
分解之后的P，Q
P：初始化用户特征矩阵M*K
Q：初始化物品特征矩阵N*K
"""

# 给定超参数
K = 2 
max_iter = 5000
alpha = 0.0002
lamda = 0.004

# 核心算法
def LFM_grad_desc( R, K=5, max_iter=1000, alpha=0.0001, lamda=0.002 ):
    # 基本维度参数定义
    M = len(R)
    N = len(R[0])
    
    # R,Q初始值，随机生成
    P = np.random.rand(M, K)
    Q = np.random.rand(N, K)
    Q = Q.T #转置
    
    # 开始迭代
    for step in range(max_iter):
        # 对所有的用户u、物品i做遍历，对应的Pu、Qi梯度下降
        for u in range(M):
            for i in range(N):
                # 对于每一个大于0的评分，求出预测评分误差
                if R[u][i] > 0:
                    eui = np.dot( P[u,:],Q[:,i] ) - R[u][i] # np.dot 点乘，用于处理矩阵
                    
                    # 代入公式，按照梯度下降算法更新当前Pu、Qi
                    for k in range(K):
                        P[u][k] = P[u][k] - alpha * ( 2 * eui * Q[k][i] + 2 * lamda * P[u][k] )
                        Q[k][i] = Q[k][i] - alpha * ( 2 * eui * P[u][k] + 2 * lamda * Q[k][i] )
            
            # u、i遍历完成，所有特征向量更新完成，可以得到P，Q，可以计算预测评分矩阵
            predR = np.dot( P, Q )
            
            # 计算当前损失函数
            cost = 0
            for u in range(M):
                for i in range(N):
                    if R[u][i] > 0:
                        cost += ( np.dot( P[u,:], Q[:,i] ) - R[u][i] ) ** 2
                        # 加上正则化项
                        for k in range(K):
                            cost += lamda * ( P[u][k] ** 2 + Q[k][i] ** 2 )
            if cost < 0.0001:
                break
    return P, Q.T, cost

### 3.测试

In [13]:
P, Q, cost = LFM_grad_desc(R, K, max_iter, alpha, lamda)

print(P)
print(Q)
print(cost)

predR = P.dot(Q.T)

print(R)
predR

[[0.94529312 1.42593733]
 [1.77719295 1.0597533 ]
 [1.63432235 0.11703644]
 [0.1499488  1.87793171]
 [0.89275617 1.71621519]
 [0.78984527 1.64788479]]
[[0.44659847 2.56285658]
 [0.33758418 1.51139215]
 [1.26413943 0.40248095]
 [2.30502708 1.4803337 ]
 [0.23674665 0.50052945]]
1.8404665841443315
[[4 0 2 0 1]
 [0 2 3 0 0]
 [1 0 2 4 0]
 [5 0 0 3 1]
 [0 0 1 5 1]
 [0 3 2 4 1]]


array([[4.07663932, 2.47426648, 1.76889492, 4.28978932, 0.9375186 ],
       [3.50968738, 2.20165505, 2.67315019, 5.66526639, 0.9511822 ],
       [1.02983347, 0.72860933, 2.11311626, 3.94041026, 0.44550052],
       [4.87983654, 2.88891158, 0.94538793, 3.12560162, 0.97546   ],
       [4.79711695, 2.89525453, 1.8193122 , 4.59839833, 1.07037327],
       [4.57603607, 2.7572394 , 1.66171678, 4.26003411, 1.01180808]])