In [1]:
# 推荐系统玩家 之 矩阵分解(Matrix Factorization)的基本方法 及其 优缺点 - 推荐系统玩家的文章 - 知乎
# https://zhuanlan.zhihu.com/p/145120275
import numpy as np

In [2]:
# 原始矩阵
arr = [5,5,0,5
    ,5,0,3,4
    ,3,4,0,3
    ,0,0,5,3
    ,5,4,4,5
    ,5,4,5,5]
arr_6_4 = np.reshape(arr,newshape=[6,4])
print(arr_6_4)

[[5 5 0 5]
 [5 0 3 4]
 [3 4 0 3]
 [0 0 5 3]
 [5 4 4 5]
 [5 4 5 5]]


In [3]:
# [U,singular,Vtranspose]=svd(A)
# 代码参考: https://blog.csdn.net/kbccs/article/details/82590032
U, singular, V_transpose = np.linalg.svd(arr_6_4) # SVD

# 保留小数
# decimal = 4
# U , singular, V_transpose= np.round(U,decimal), np.round(singular,decimal), np.round(V_transpose,decimal) 

print('U Matrix: \n',U ,end='\n\n')
print('singular Matrix: \n',singular ,end='\n\n')
# 转置矩阵: https://zh.wikipedia.org/wiki/%E8%BD%AC%E7%BD%AE%E7%9F%A9%E9%98%B5
print('V_transpose Matrix: \n',V_transpose ,end='\n\n')
# V_transpose = np.transpose(V_transpose)
# print('V_transpose Matrix: \n',V_transpose ,end='\n\n')


U Matrix: 
 [[-0.44721867 -0.53728743 -0.00643789 -0.50369332 -0.38572484 -0.32982665]
 [-0.35861531  0.24605053  0.86223083 -0.14584826  0.07797295  0.20015165]
 [-0.29246336 -0.40329582 -0.22754042 -0.10376096  0.4360104   0.70652079]
 [-0.20779151  0.67004393 -0.3950621  -0.58878098  0.02599098  0.06671722]
 [-0.50993331  0.05969518 -0.10968053  0.28687443  0.59460203 -0.53714632]
 [-0.53164501  0.18870999 -0.19141061  0.53413013 -0.54845638  0.24290885]]

singular Matrix: 
 [17.71392084  6.39167145  3.09796097  1.32897797]

V_transpose Matrix: 
 [[-0.57098887 -0.4274751  -0.38459931 -0.58593526]
 [-0.22279713 -0.51723555  0.82462029  0.05319973]
 [ 0.67492385 -0.69294472 -0.2531966   0.01403201]
 [ 0.41086611  0.26374238  0.32859738 -0.80848795]]



In [4]:
identity_matrix = np.eye(4) # 6 row 4 col 的单位矩阵

print('identity_matrix: \n',identity_matrix,end='\n\n')

print('singular: \n',singular,end='\n\n')

# Singular 转换为 Matrix
# multiply 是对应位置乘除（和点乘不一样）
# 这里相当于把 identity 的每个 column 乘以 singular 的每个 columns

singular_matrix = identity_matrix * singular

print('singular Matrix: \n',singular_matrix ,end='\n\n')


identity_matrix: 
 [[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]

singular: 
 [17.71392084  6.39167145  3.09796097  1.32897797]

singular Matrix: 
 [[17.71392084  0.          0.          0.        ]
 [ 0.          6.39167145  0.          0.        ]
 [ 0.          0.          3.09796097  0.        ]
 [ 0.          0.          0.          1.32897797]]



In [5]:
k = 2 # Latent Feature 隐含特征 

# 多矩阵点乘
# U[:, 0:k] 其中 
# 第一个 : 代表所有 rows (第一维度)
# :k 等价于 0:k 代表取 0:k 共 k 个 columns

U_matrix = np.mat(U[:,:k])
print('k feature U: \n', U_matrix ,'\n')
singular_matrix = singular_matrix[:k,:k]
print('k feature singular_matrix: \n', singular_matrix,'\n')

V_transpose_matrix = np.mat(V_transpose[:k,:])
print('k feature singular_matrix: \n', V_transpose_matrix)

k feature U: 
 [[-0.44721867 -0.53728743]
 [-0.35861531  0.24605053]
 [-0.29246336 -0.40329582]
 [-0.20779151  0.67004393]
 [-0.50993331  0.05969518]
 [-0.53164501  0.18870999]] 

k feature singular_matrix: 
 [[17.71392084  0.        ]
 [ 0.          6.39167145]] 

k feature singular_matrix: 
 [[-0.57098887 -0.4274751  -0.38459931 -0.58593526]
 [-0.22279713 -0.51723555  0.82462029  0.05319973]]


In [6]:
svd_estimate = U_matrix * singular_matrix * V_transpose_matrix
print(arr_6_4, '\n')
print('svd_estimate: \n',svd_estimate.round(4)) 

[[5 5 0 5]
 [5 0 3 4]
 [3 4 0 3]
 [0 0 5 3]
 [5 4 4 5]
 [5 4 5 5]]

svd_estimate: 
 [[ 5.2885  5.1627  0.2149  4.4591]
 [ 3.2768  1.9021  3.74    3.8058]
 [ 3.5324  3.5479 -0.1332  2.8984]
 [ 1.1475 -0.6417  4.9472  2.3846]
 [ 5.0727  3.664   3.7887  5.313 ]
 [ 5.1086  3.4019  4.6166  5.5822]]
