In [1]:
import random
import math

In [2]:
# 加载数据
def loadData():
    rating_data={1: {'A': 5, 'B': 3, 'C': 4, 'D': 4},
           2: {'A': 3, 'B': 1, 'C': 2, 'D': 3, 'E': 3},
           3: {'A': 4, 'B': 3, 'C': 4, 'D': 3, 'E': 5},
           4: {'A': 3, 'B': 3, 'C': 1, 'D': 5, 'E': 4},
           5: {'A': 1, 'B': 5, 'C': 5, 'D': 2, 'E': 1}
          }
    return rating_data

rating_data = loadData()
rating_data

{1: {'A': 5, 'B': 3, 'C': 4, 'D': 4},
 2: {'A': 3, 'B': 1, 'C': 2, 'D': 3, 'E': 3},
 3: {'A': 4, 'B': 3, 'C': 4, 'D': 3, 'E': 5},
 4: {'A': 3, 'B': 3, 'C': 1, 'D': 5, 'E': 4},
 5: {'A': 1, 'B': 5, 'C': 5, 'D': 2, 'E': 1}}

In [3]:
# 定义模型
class BiasSVD():
    def __init__(self, rating_data, dim=5, alpha=0.1, lmbda=0.1, epochs=100):
        self.dim = dim          # 这个表示隐向量的维度
        self.P = dict()     # 用户矩阵P  大小是[users_num, dim]
        self.Q = dict()     # 物品矩阵Q  大小是[item_nums, dim]
        self.bu = dict()    # 用户偏置系数
        self.bi = dict()    # 物品偏置系数
        self.mu = 0         # 全局偏置系数
        self.alpha = alpha  # 学习率
        self.lmbda = lmbda  # 正则项系数
        self.epochs = epochs        # 最大迭代次数
        self.rating_data = rating_data  # 评分矩阵

        for user, items in self.rating_data.items():
            # 初始化矩阵P和Q, 随机数需要和1/sqrt(dim)成正比
            self.P[user] = [random.random() / math.sqrt(self.dim) for x in range(0, dim)]
            self.bu[user] = 0
            for item, rating in items.items():
                if item not in self.Q:
                    self.Q[item] = [random.random() / math.sqrt(self.dim) for x in range(0, dim)]
                    self.bi[item] = 0

    # 采用随机梯度下降的方式训练模型参数
    def train(self):
        
        # 计算全局平均分
        cnt, mu_sum = 0, 0
        for user, items in self.rating_data.items():
            for item, rui in items.items():
                mu_sum, cnt = mu_sum + rui, cnt + 1
        self.mu = mu_sum / cnt

        for epoch in range(self.epochs):
            # 遍历所有的用户及历史交互物品
            for user, items in self.rating_data.items():
                # 遍历历史交互物品
                for item, rui in items.items():
                    rhat_ui = self.predict(user, item)  # 评分预测
                    e_ui = rui - rhat_ui                  # 评分预测偏差

                    # 参数更新
                    self.bu[user] += self.alpha * (e_ui - self.lmbda * self.bu[user])
                    self.bi[item] += self.alpha * (e_ui - self.lmbda * self.bi[item])
                    for k in range(0, self.dim):
                        self.P[user][k] += self.alpha * (e_ui * self.Q[item][k] - self.lmbda * self.P[user][k])
                        self.Q[item][k] += self.alpha * (e_ui * self.P[user][k] - self.lmbda * self.Q[item][k])
            # 逐步降低学习率
            self.alpha *= 0.1


    # 评分预测
    def predict(self, user, item):
        return sum(self.P[user][dim] * self.Q[item][dim] for dim in range(0, self.dim)) + self.bu[user] + self.bi[
            item] + self.mu
    
model = BiasSVD(rating_data, dim=10)

In [4]:
# 模型训练
model.train()

# 预测用户1对物品E的评分
for item in ['E']:
    print(item, model.predict(1, item))

E 3.647434779167003


In [5]:
# 使用torch实现
import torch
import torch.nn as nn
import torch.optim as optim

class BiasSVD(nn.Module):
    def __init__(self, dim, num_users, num_items):
        super().__init__()
        self.P = nn.Embedding(num_users, dim)
        self.Q = nn.Embedding(num_items, dim)
        self.bu = nn.Embedding(num_users, 1)
        self.bi = nn.Embedding(num_items, 1)
        self.mu = nn.Parameter(torch.tensor(0.0))

        # 初始化参数
        nn.init.normal_(self.P.weight, std=0.01)
        nn.init.normal_(self.Q.weight, std=0.01)
        nn.init.constant_(self.bu.weight, 0.0)
        nn.init.constant_(self.bi.weight, 0.0)

    def forward(self, user_ids, item_ids):
        p_u = self.P(user_ids)  # 用户隐向量
        q_i = self.Q(item_ids)  # 物品隐向量
        
        bu = self.bu(user_ids).squeeze()
        bi = self.bi(item_ids).squeeze()
        
        pred = (p_u * q_i).sum(dim=1) + bu + bi + self.mu
        return pred
    
# 求用户和物品的个数
user_to_idx = {user: idx for idx, user in enumerate(rating_data.keys())}
item_to_idx = {item: idx for idx, item in enumerate(set(i for u in rating_data for i in rating_data[u]))}
num_users = len(user_to_idx)
num_items = len(item_to_idx)

model = BiasSVD(dim=10, num_users=num_users, num_items=num_items)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.1)
criterion = nn.MSELoss()

In [6]:
# 训练模型
train_data = []
for user, items in rating_data.items():
    for item, rating in items.items():
        train_data.append((user_to_idx[user], item_to_idx[item], rating))

train_tensor = torch.tensor(train_data, dtype=torch.long)
user_ids, item_ids, ratings = train_tensor[:, 0], train_tensor[:, 1], train_tensor[:, 2].float()

epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    predict = model(user_ids, item_ids)
    loss = criterion(predict, ratings)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# 预测用户1对物品E的评分
user_id = torch.tensor([user_to_idx[1]], dtype=torch.long)
item_id = torch.tensor([item_to_idx['E']], dtype=torch.long)
model.eval()
predict = model(user_id, item_id)
print(predict.item())

Epoch 0, Loss: 12.0414
Epoch 10, Loss: 9.8609
Epoch 20, Loss: 6.9644
Epoch 30, Loss: 3.8477
Epoch 40, Loss: 1.8926
Epoch 50, Loss: 1.4653
Epoch 60, Loss: 1.3813
Epoch 70, Loss: 1.4253
Epoch 80, Loss: 1.4278
Epoch 90, Loss: 1.3843
4.112499237060547
