使用BiasSVD的方法来进行矩阵分解，具体分解方法如下:
$$R_{m\times n}=P_{m\times k}\times Q_{k\times n}$$
其中假定用户偏置为$b_u$,物品偏置为$b_i$,u为评分系统平均分
    $$b_{u,i}=u+b_u+b_i$$
 则我们需要优化的目标函数$J(p,q)$为
 $$\sum_{u,i}{(r_{u,i}-u-b_u-b_i-q_i^Tp_u)}^2+\lambda{(\|p_u\|^2+\|q_i\|^2+b_u^2+b_i^2)}$$
 对于该优化目标我们也可以采用梯度下降的方式求解，$b_u$和$b_i$可以初始化为0
 根据上述式子对需要跟新的参数求偏导如下：
 $$
 \frac{\partial {J}}{\partial {p_u}}=-2(r_{u,i}-u-b_i-b_u-q_i^Tp_u)q_i+2\lambda p_u $$
 $$
 \frac{\partial {J}}{\partial {q_i}}=-2(r_{u,i}-u-b_i-b_u-q_i^Tp_u)p_u+2\lambda q_i 
 $$
 对每个参数迭代更新公式如下:
 $$
 p_u = p_u+\alpha{((r_{u,i}-u-b_i-b_u-q_i^Tp_u)q_i-\lambda p_u)}
 $$

 $$
 q_i = q_i+\alpha{((r_{u,i}-u-b_i-b_u-q_i^Tp_u)p_u-\lambda q_i)}
 $$
 $$
 b_i=b_i+\alpha{(r_{u,i}-b_i-b_u-u-q_i^Tp_u-\lambda b_i)}
 $$
 $$
  b_u=b_u+\alpha{(r_{u,i}-b_i-b_u-u-q_i^Tp_u-\lambda b_u)}
 $$

In [10]:
import math
import scipy.sparse
from scipy.sparse import coo_matrix
import numpy as np

In [None]:
def load_train_data(train_data_path):
    items_dict = []
    train_data = []
    print("start loading...")
    with open(train_data_path, 'r') as file:
        lines = file.readlines()
        user_num = 0
        items_num = 0
        rating_num = 0
        sum_rating = 0;
        for line in lines:
            if '|' in line:
                user_num += 1
                user_id = line.split('|')[0]
            else:
                item_id, rating = line.split()
                rating_num +=1
                sum_rating += int(rating)
                items_num = max(items_num, int(item_id))
                train_data.append([int(user_id),int(item_id),float(rating)])
        print("finish loading...")
    return user_num,items_num+1,rating_num, sum_rating,train_data
# train_data_path = "data-202205/train.txt"
# user_num,items_num, rating_num, sum_rating,train_data = load_train_data(train_data_path)
# print("用户数量:",user_num)
# print("物品数量:",items_num)
# print("评分数量:",rating_num)
# print("总评分数:",sum_rating)
# def transfer_sparse_matrix(data, user_num, items_num):
#     data_array = np.array(data)
    
#     row = data_array[:,0].astype(int)
#     col = data_array[:,1].astype(int)
#     value = data_array[:,2].astype(np.float32)
#     R = coo_matrix((value,(row,col)),shape=(user_num,items_num))
#     R = R.tocsr()
#     return R
#R = transfer_sparse_matrix(train_data,user_num,items_num)


In [190]:
class BiasSVD:
    
    def __init__(self, learning_rate = 0.001,lambda_regularizer = 0.1,laten_factors=10,epoch=10):
        self.learning_rate = learning_rate   #学习率
#         self.reg_p = reg_p
#         self.reg_q = reg_q
#         self.bu = bu
#         self.bi = bi
        self.laten_factors = laten_factors   #分割的低维度矩阵中隐式类别的数量
        self.epoch = epoch #训练轮数
        self.lambda_regularizer = lambda_regularizer    #正则化参数
    def test(self):
        return 2
    
    def train(self):
        self.reg_p = np.random(0,0.1,(self.user_num,self.laten_factors))
        self.reg_q = np.random(0,0.1,(self.user_num,self.laten_factors))
        self.bu = np.zeros(self.user_num)
        self.bi = np.zeros(self.item_num)
        train_data_path = "data-202205/train.txt"
        user_num, item_num, rating_num, sum_rating, train_data = load_train_data(train_data_path)
        self.user_num = user_num
        self.item_num = item_num
        aveg_rating = float32(sum_rating/rating_num)
        
        records_list = []
        for step in range(0,self.epoch):
            loss = 0.0
            for data in train_data:
                u, i, r = data
                P[u],Q[i],bu[u],bi[i], ls = self.update(P[u],Q[i],bu=bu[i],bi=b[i],aveg_rating=aveg_rating,r=r,
                                                       learning_rate=self.learning_rate,
                                                       lambda_regularizer=self.lambda_regularizer)
                loss+= ls
            print("step:%d loss:%d"%(step,loss)
            
#     def tet(self):
#         error = r-(aveg_rating + bu + bi + np.dot(p,q.T))
#         p = p + learning_rate(error* q - lambda_regularizer*p)
#         q = q + learning_rate(error* p - lambda_regularizer*q)
#         bu = bu + learning_rate(error - lambda_regularizer*bu)
#         bi = bi + learning_rate(error - lambda_regularizer*bi)
#         loss = 0.5*(error**2 + lambda_regularizer*(np.square(p).sum()+np.square(q).sum())+bu**2 + bi**2)
#         return p,q,bu,bi,loss              
if __name__ == "__main__":
#     train_data_path = "data-202205/train.txt"
#     user_num,item_num,rating_num,sum_rating, data = load_train_data(train_data_path)
#     print("用户数量:",user_num)
#     print("物品数量:",item_num)
#     print("评分数量:",rating_num)
#     print("总评分数:",sum_rating)
    svd = BiasSVD()
    print(svd.lambda_regularizer)
                  


SyntaxError: invalid syntax (<ipython-input-190-1de404466a83>, line 45)

NameError: name 'BiasSVD' is not defined