# LFM梯度下降算法实现

### 0. 引入依赖

In [1]:
import numpy as np
import pandas as pd

### 1. 数据准备

In [5]:
# 评分矩阵R
R = np.array([[4,4,2,4,1],
             [1,2,3,0,0],
             [3,3,2,4,0],
             [5,2,0,3,1],
             [1,2,1,5,1],
             [0,3,2,4,1],])
len(R[0])

5

### 2. 算法实现

In [6]:
"""
@输入参数：
R：M*N 的评分矩阵
K：隐特征向量维度
max_iter: 最大迭代次数
alpha：步长
lamda：正则化系数

@输出：
分解之后的 P，Q
P：初始化用户特征矩阵M*K
Q：初始化物品特征矩阵N*K
"""

# 给定超参数

K = 5
max_iter = 5000
alpha = 0.0002
lamda = 0.004

# 核心算法
def LFM_grad_desc( R, K=2, max_iter=1000, alpha=0.0001, lamda=0.002):
    # 基本维度参数定义
    M = len(R)
    N = len(R[0])
    
    # P,Q初始值，随机生成
    P = np.random.rand(M, K)
    Q = np.random.rand(N, K)
    Q = Q.T
    
    # 开始迭代
    for step in range(max_iter):
        # 对所有的用户u、物品i做遍历，对应的特征向量Pu、Qi梯度下降
        for u in range(M):
            for i in range(N):
                # 对于每一个大于0的评分，求出预测评分误差
                if R[u][i] > 0:
                    eui = np.dot( P[u,:], Q[:,i] ) - R[u][i]
                    
                    # 代入公式，按照梯度下降算法更新当前的Pu、Qi
                    for k in range(K):
                        P[u][k] = P[u][k] - alpha * ( 2 * eui * Q[k][i] + 2 * lamda * P[u][k] )
                        Q[k][i] = Q[k][i] - alpha * ( 2 * eui * P[u][k] + 2 * lamda * Q[k][i] )
        
        # u、i遍历完成，所有特征向量更新完成，可以得到P、Q，可以计算预测评分矩阵
        predR = np.dot( P, Q )
        
        # 计算当前损失函数
        cost = 0
        for u in range(M):
            for i in range(N):
                if R[u][i] > 0:
                    cost += ( np.dot( P[u,:], Q[:,i] ) - R[u][i] ) ** 2
                    # 加上正则化项
                    for k in range(K):
                        cost += lamda * ( P[u][k] ** 2 + Q[k][i] ** 2 )
        if cost < 0.0001:
            break
        
    return P, Q.T, cost

### 3. 测试

In [7]:
P, Q, cost = LFM_grad_desc(R, K, max_iter, alpha, lamda)

print(P)
print(Q)
print(cost)

predR = P.dot(Q.T)

print(R)
predR

[[ 0.9179266   1.00375037  0.06243814  1.23424653  0.95099841]
 [-0.13437698  0.31430769  1.87905018  1.22165638  0.44665416]
 [ 0.76048302  0.446364    0.50244411  0.97140465  0.80613643]
 [ 1.95616132  0.69619753  0.5963862   0.20272795  0.30230481]
 [ 0.0102586  -0.25849055  1.13293199  0.9908161   0.70094664]
 [ 0.89766445  0.40330358  0.58419283  0.76115436  0.95786257]]
[[ 2.07633106  0.79098309  0.08818344  0.52426283  0.66562527]
 [ 0.5072387   0.63162303 -0.10292354  1.25226227  1.40228611]
 [ 0.40714663  0.59092871  0.84536397  0.61815954  0.33032996]
 [ 0.44327269  0.26554728  2.10870007  1.77713184  1.02335256]
 [ 0.45471798 -0.29936398  0.19209649  0.5407165   0.22385022]]
1.5035948020591539
[[4 4 2 4 1]
 [1 2 3 0 0]
 [3 3 2 4 0]
 [5 2 0 3 1]
 [1 2 1 5 1]
 [0 3 2 4 1]]


array([[3.98545324, 3.97234561, 2.09676312, 3.97172375, 1.00916384],
       [1.07307542, 2.09313539, 2.62222543, 6.61438029, 0.9663163 ],
       [3.02224428, 2.96285418, 1.86491961, 4.06641193, 1.01440609],
       [4.97241482, 2.04837935, 1.93718955, 2.97922457, 0.9729383 ],
       [0.9027603 , 1.94901897, 1.65319326, 4.80304605, 0.99233746],
       [3.27099348, 2.94630105, 1.88458582, 4.06979653, 1.02565746]])