In [None]:
# 推荐模型
# 协同过滤：矩阵分解、自编码器
# 内容过滤：因子分解机


In [1]:
# 加载数据集1
import pandas as pd
from sklearn.model_selection import train_test_split
# from scipy.sparse import spmatrix

# 加载 数据集 movie_ratings.csv
movie_ratings = pd.read_csv('./data/movie_ratings.csv', header=None)
movie_ratings.columns = ['user_id', 'item_id', 'rating']
user_ids = movie_ratings.value_counts(subset=['user_id'])
item_ids = movie_ratings.value_counts(subset=['item_id'])
movie_ratings['rating'] = (movie_ratings['rating'] - 0) / (movie_ratings['rating'].max() - 0)
X_train, X_test, y_train, y_test = train_test_split(movie_ratings.values[:,:2], movie_ratings.values[:,2], test_size=0.4, random_state=0)
print(len(user_ids), len(item_ids), user_ids.keys().max(), item_ids.keys().max())
movie_ratings.head()
num_users = user_ids.keys().max()[0] + 1 
num_items = item_ids.keys().max()[0] + 1


2526 9555 (2533,) (9565,)


In [1]:
# 加载数据集2
# 数据集：ml-100k

import os
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# 加载数据
ratings = np.array([[int(x) for x in line.strip().split('\t')[:3]] for line in open('./data/ml-100k/ua.base','r').read().strip().split('\n')], dtype=np.int32)
ratings[:,-1] = (ratings[:,-1] - 0)/(max(ratings[:,-1]) - 0)
occupation_dict = {'administrator':0, 'artist':1, 'doctor':2, 'educator':3, 'engineer':4, 'entertainment':5, 'executive':6, 'healthcare':7, 'homemaker':8, 'lawyer':9, 'librarian':10, 'marketing':11, 'none':12, 'other':13, 'programmer':14, 'retired':15, 'salesman':16, 'scientist':17, 'student':18, 'technician':19, 'writer':20}
gender_dict={'M':1,'F':0}
user_info = {}
for line in open('./data/ml-100k/u.user','r', encoding='utf-8').read().strip().split('\n'):
    phs = line.strip().split('|')
    user_info[int(phs[0])] = [int(phs[1]), gender_dict[phs[2]], occupation_dict[phs[3]]]
item_info = {}
for line in open('./data/ml-100k/u.item','r', encoding='ISO-8859-1').read().strip().split('\n'):
    phs = line.strip().split('|')
    item_info[int(phs[0])] = phs[5:]
data = np.array([user_info[u] + item_info[i] + [r] for u, i, r in ratings], dtype=np.float32)
data[:,:-1] = MinMaxScaler().fit_transform(data[:,:-1])
# print(len(user_info[list(user_info.keys())[0]]), len(item_info[list(item_info.keys())[0]]))
# print(data.shape)
num_users = len(user_info)
num_items = len(item_info)
num_features = 22

In [3]:
# CF评分预测
# 1.1 mf矩阵分解模型: R = P Q.T
# 数据集：movie_ratings.csv

import torch
from torch.nn import Module, Parameter, MSELoss, Embedding
from torch.utils.data import TensorDataset, DataLoader
from datetime import datetime
device = torch.device("cuda:0" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else "cpu"))
batch_size = 100
num_epochs = 10
dim=99
train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).long(), torch.from_numpy(y_train).float()), batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).long(), torch.from_numpy(y_test).float()), batch_size=batch_size, shuffle=False, pin_memory=True)

# 矩阵分解
class MatrixFactorization(Module):
    def __init__(self, num_users, num_items, dim):
        super(MatrixFactorization, self).__init__()
        self.num_users, self.num_items, self.dim = num_users, num_items, dim
        self.user_embeddings = Embedding(num_users, dim)
        self.item_embeddings = Embedding(num_items, dim)
    def forward(self, user_item_ids):
        user_embeddings = self.user_embeddings(user_item_ids[:,0])
        item_embeddings = self.item_embeddings(user_item_ids[:,1])
        result = torch.sigmoid(torch.sum(user_embeddings * item_embeddings, dim=-1))
        return result.squeeze()
model = MatrixFactorization(num_users=num_users, num_items=num_items, dim=dim).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)
criterion = MSELoss(reduction='sum').to(device)

for epoch in range(num_epochs):
    # train:
    epoch_train_losses = []
    model.train()
    for i, inputs in enumerate(train_loader):
        optimizer.zero_grad()
        input = inputs[0].to(device)
        label = inputs[1].to(device)
        output = model(input)
        loss = criterion(output, label)
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)
        optimizer.step()
        epoch_train_losses.append([input.shape[0], loss.item()])
    # validate:
    model.eval()
    epoch_test_losses = []
    for i, inputs in enumerate(test_loader):
        input = inputs[0].to(device)
        label = inputs[1].to(device)
        output = model(input)
        loss = criterion(output, label)
        epoch_test_losses.append([input.shape[0], loss.item()])
    train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])
    test_loss  = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])
    # print
    print('['+datetime.now().strftime("%Y-%m-%d %H:%M:%S")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs,  train_loss, test_loss))

[2023-08-31 21:40:09] epoch=[1/10], train_mse_loss: 0.0918, validate_mse_loss: 0.0343
[2023-08-31 21:40:56] epoch=[2/10], train_mse_loss: 0.0347, validate_mse_loss: 0.0336
[2023-08-31 21:41:43] epoch=[3/10], train_mse_loss: 0.0322, validate_mse_loss: 0.0322
[2023-08-31 21:42:31] epoch=[4/10], train_mse_loss: 0.0314, validate_mse_loss: 0.0314
[2023-08-31 21:43:20] epoch=[5/10], train_mse_loss: 0.0309, validate_mse_loss: 0.0310
[2023-08-31 21:44:08] epoch=[6/10], train_mse_loss: 0.0306, validate_mse_loss: 0.0310
[2023-08-31 21:44:55] epoch=[7/10], train_mse_loss: 0.0305, validate_mse_loss: 0.0309
[2023-08-31 21:45:43] epoch=[8/10], train_mse_loss: 0.0305, validate_mse_loss: 0.0308
[2023-08-31 21:46:31] epoch=[9/10], train_mse_loss: 0.0304, validate_mse_loss: 0.0309
[2023-08-31 21:47:19] epoch=[10/10], train_mse_loss: 0.0305, validate_mse_loss: 0.0309


In [4]:
# CF评分预测
# 1.2 SVD矩阵分解模型: R = P Q.T + user_bias + item_bias + bias
# 数据集：movie_ratings.csv

import torch
from torch.nn import Module, Parameter, MSELoss, Embedding
from torch.utils.data import TensorDataset, DataLoader
from datetime import datetime
device = torch.device("cuda:0" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else "cpu"))
batch_size = 100
num_epochs = 10
dim=99
train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).long(), torch.from_numpy(y_train).float()), batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).long(), torch.from_numpy(y_test).float()), batch_size=batch_size, shuffle=False, pin_memory=True)

# 矩阵分解
class SVD(Module):
    def __init__(self, num_users, num_items, dim):
        super(SVD, self).__init__()
        self.num_users, self.num_items, self.dim = num_users, num_items, dim
        self.user_embeddings = Embedding(num_users, dim)
        self.item_embeddings = Embedding(num_items, dim)
        self.bias = Parameter(torch.randn((1,1)), requires_grad=True)
        self.user_bias = Parameter(torch.randn((num_users,1)), requires_grad=True)
        self.item_bias = Parameter(torch.randn((num_items,1)), requires_grad=True)
    def forward(self, user_item_ids):
        user_embeddings = self.user_embeddings(user_item_ids[:,0])
        item_embeddings = self.item_embeddings(user_item_ids[:,1])
        result = torch.sigmoid(torch.sum(user_embeddings * item_embeddings, dim=-1, keepdim=True) + self.user_bias[user_item_ids[:,0]] + self.item_bias[user_item_ids[:,1]] + self.bias)
        return result.squeeze()
model = SVD(num_users=num_users, num_items=num_items, dim=dim).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)
criterion = MSELoss(reduction='sum').to(device)

for epoch in range(num_epochs):
    # train:
    epoch_train_losses = []
    model.train()
    for i, inputs in enumerate(train_loader):
        optimizer.zero_grad()
        input = inputs[0].to(device)
        label = inputs[1].to(device)
        output = model(input)
        loss = criterion(output, label)
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)
        optimizer.step()
        epoch_train_losses.append([input.shape[0], loss.item()])
    # validate:
    model.eval()
    epoch_test_losses = []
    for i, inputs in enumerate(test_loader):
        input = inputs[0].to(device)
        label = inputs[1].to(device)
        output = model(input)
        loss = criterion(output, label)
        epoch_test_losses.append([input.shape[0], loss.item()])
    train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])
    test_loss  = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])
    # print
    print('['+datetime.now().strftime("%Y-%m-%d %H:%M:%S")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs,  train_loss, test_loss))

[2023-09-01 11:30:42] epoch=[1/10], train_mse_loss: 0.0602, validate_mse_loss: 0.0276
[2023-09-01 11:31:45] epoch=[2/10], train_mse_loss: 0.0284, validate_mse_loss: 0.0290
[2023-09-01 11:32:47] epoch=[3/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0289
[2023-09-01 11:33:50] epoch=[4/10], train_mse_loss: 0.0286, validate_mse_loss: 0.0289
[2023-09-01 11:34:53] epoch=[5/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0290
[2023-09-01 11:35:54] epoch=[6/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0288
[2023-09-01 11:36:57] epoch=[7/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0288
[2023-09-01 11:37:59] epoch=[8/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0291
[2023-09-01 11:39:00] epoch=[9/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0289
[2023-09-01 11:40:01] epoch=[10/10], train_mse_loss: 0.0286, validate_mse_loss: 0.0290


In [4]:
# CF评分预测
# 2.自编码器: R -> z -> R_
# 数据集：movie_ratings.csv

import torch, numpy as np
from torch.nn import Module, Parameter, MSELoss
from torch.utils.data import TensorDataset, DataLoader
from torch import nn
from datetime import datetime
device = torch.device("cuda:0" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else "cpu"))
batch_size = 100
num_epochs = 10
dim=99
# train：
train = np.zeros((num_users, num_items), dtype=np.float32)
for f, l in zip(X_train, y_train):
    train[int(f[0]), int(f[1])] = l
# with mask
train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(train).float(), torch.from_numpy(train>0.01).float()), batch_size=batch_size, shuffle=True, pin_memory=True)
# test:
test = np.zeros((num_users, num_items), dtype=np.float32)
for f, l in zip(X_test, y_test):
    test[int(f[0]), int(f[1])] = l
# with mask
test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(test).float(), torch.from_numpy(test>0.01).float()), batch_size=batch_size, shuffle=False, pin_memory=True)


# 自编码器
class AutoEncoder(Module):
    def __init__(self, num_users, num_items, dim):
        super(AutoEncoder,self).__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.dim = dim
        self.encoder = nn.Sequential(nn.Linear(num_items, dim), nn.Sigmoid())
        self.decoder = nn.Sequential(nn.Linear(dim, num_items))
    def forward(self, torch_input):
        encoder = self.encoder(torch_input)
        decoder = self.decoder(encoder)
        return torch.sigmoid(decoder).squeeze()
model = AutoEncoder(num_users=num_users, num_items=num_items, dim=dim).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)
criterion = MSELoss(reduction='none').to(device)

for epoch in range(num_epochs):
    # train:
    epoch_train_losses = []
    model.train()
    for i, inputs in enumerate(train_loader):
        optimizer.zero_grad()
        input = inputs[0].to(device)
        mask = inputs[1].to(device)
        if mask.sum().item()==0:
            continue
        output = model(input)
        loss = torch.sum(criterion(output, input) * mask)
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)
        optimizer.step()
        epoch_train_losses.append([mask.sum().item(), loss.item()])
    # validate:
    model.eval()
    epoch_test_losses = []
    for i, inputs in enumerate(test_loader):
        optimizer.zero_grad()
        input = inputs[0].to(device)
        mask = inputs[1].to(device)
        if mask.sum().item()==0:
            continue
        output = model(input)
        loss = torch.sum(criterion(output, input) * mask)
        epoch_test_losses.append([mask.sum().item(), loss.item()])
    train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])
    test_loss  = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])
    # print
    print('['+datetime.now().strftime("%Y-%m-%d %H:%M:%S")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs,  train_loss, test_loss))

[2023-08-31 22:41:17] epoch=[1/10], train_mse_loss: 0.0380, validate_mse_loss: 0.0300
[2023-08-31 22:41:18] epoch=[2/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0286
[2023-08-31 22:41:18] epoch=[3/10], train_mse_loss: 0.0275, validate_mse_loss: 0.0283
[2023-08-31 22:41:19] epoch=[4/10], train_mse_loss: 0.0261, validate_mse_loss: 0.0282
[2023-08-31 22:41:19] epoch=[5/10], train_mse_loss: 0.0245, validate_mse_loss: 0.0280
[2023-08-31 22:41:20] epoch=[6/10], train_mse_loss: 0.0228, validate_mse_loss: 0.0282
[2023-08-31 22:41:21] epoch=[7/10], train_mse_loss: 0.0213, validate_mse_loss: 0.0284
[2023-08-31 22:41:21] epoch=[8/10], train_mse_loss: 0.0198, validate_mse_loss: 0.0289
[2023-08-31 22:41:22] epoch=[9/10], train_mse_loss: 0.0185, validate_mse_loss: 0.0290
[2023-08-31 22:41:22] epoch=[10/10], train_mse_loss: 0.0172, validate_mse_loss: 0.0290


In [9]:
# CF评分预测
# 3.神经协同过滤 NeuralCF: MF + MLP 独立
# NeuralCF的实现，用于召回
# WWW 2017] Neural Collaborative Filtering
# 数据集：movie_ratings.csv

import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from datetime import datetime
from tqdm import tqdm
device = torch.device("cuda:0" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else "cpu"))
batch_size = 100
num_epochs = 10
dim = 20
train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).long(), torch.from_numpy(y_train).float()), batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).long(), torch.from_numpy(y_test).float()), batch_size=batch_size, shuffle=False, pin_memory=True)

class NeuralMF(nn.Module):
    def __init__(self, num_users, num_items, mf_dim, mlp_dim, dnn_layer_dims):
        super(NeuralMF, self).__init__()
        self.MF_Embedding_User = nn.Embedding(num_embeddings=num_users, embedding_dim=mf_dim)
        self.MF_Embedding_Item = nn.Embedding(num_embeddings=num_items, embedding_dim=mf_dim)
        self.MLP_Embedding_User = nn.Embedding(num_embeddings=num_users, embedding_dim=mlp_dim)
        self.MLP_Embedding_Item = nn.Embedding(num_embeddings=num_items, embedding_dim=mlp_dim)
        # 全连接网络
        self.dnn_network = nn.Sequential(nn.Linear(2 * mlp_dim, dnn_layer_dims[0]))
        if len(dnn_layer_dims)>1:
            for i, layer_dim in enumerate(dnn_layer_dims[1:]):
                self.dnn_network.append(nn.Linear(dnn_layer_dims[i], layer_dim))
                self.dnn_network.append(nn.ReLU())
        self.dnn_network.append(nn.Linear(dnn_layer_dims[-1], mf_dim))
        # 合并
        self.final_nn = nn.Sequential(nn.Linear(2 * mf_dim, 1), nn.Sigmoid())
    def forward(self, inputs):
        # mf
        mf_vec = self.MF_Embedding_User(inputs[:, 0]) * self.MF_Embedding_Item(inputs[:, 1])
        # mlp
        mlp_vec = torch.cat([self.MLP_Embedding_User(inputs[:, 0]), self.MLP_Embedding_Item(inputs[:, 1])], dim=-1)
        mlp_vec = self.dnn_network(mlp_vec)
        # 合并两个
        result = self.final_nn(torch.cat([mf_vec, mlp_vec], dim=-1))
        return result.squeeze()
model = NeuralMF(num_users=num_users, num_items=num_items, mf_dim=dim, mlp_dim=dim, dnn_layer_dims=[30]).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)
criterion = nn.MSELoss(reduction='sum').to(device)

for epoch in range(num_epochs):
    # train:
    epoch_train_losses = []
    model.train()
    for i, inputs in enumerate(train_loader):
        optimizer.zero_grad()
        input = inputs[0].to(device)
        label = inputs[1].to(device)
        output = model(input)
        loss = criterion(output, label)
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)
        optimizer.step()
        epoch_train_losses.append([input.shape[0], loss.item()])
    # validate:
    model.eval()
    epoch_test_losses = []
    for i, inputs in enumerate(test_loader):
        input = inputs[0].to(device)
        label = inputs[1].to(device)
        output = model(input)
        loss = criterion(output, label)
        epoch_test_losses.append([input.shape[0], loss.item()])
    train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])
    test_loss  = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])
    # print
    print('['+datetime.now().strftime("%Y-%m-%d %H:%M:%S")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs,  train_loss, test_loss))


[2023-09-01 09:33:29] epoch=[1/10], train_mse_loss: 0.1662, validate_mse_loss: 0.1674
[2023-09-01 09:33:32] epoch=[2/10], train_mse_loss: 0.1659, validate_mse_loss: 0.1670
[2023-09-01 09:33:36] epoch=[3/10], train_mse_loss: 0.1660, validate_mse_loss: 0.1666
[2023-09-01 09:33:43] epoch=[4/10], train_mse_loss: 0.1659, validate_mse_loss: 0.1668
[2023-09-01 09:33:50] epoch=[5/10], train_mse_loss: 0.1658, validate_mse_loss: 0.1667
[2023-09-01 09:33:58] epoch=[6/10], train_mse_loss: 0.1658, validate_mse_loss: 0.1671
[2023-09-01 09:34:05] epoch=[7/10], train_mse_loss: 0.1658, validate_mse_loss: 0.1666
[2023-09-01 09:34:12] epoch=[8/10], train_mse_loss: 0.1657, validate_mse_loss: 0.1667
[2023-09-01 09:34:20] epoch=[9/10], train_mse_loss: 0.1656, validate_mse_loss: 0.1666
[2023-09-01 09:34:27] epoch=[10/10], train_mse_loss: 0.1656, validate_mse_loss: 0.1666


In [5]:
# 内容过滤：
# FM 分解机：y = w0 + sum(w1 * f) + sum(<v_i,v_j> * f_i f_j)
# 数据集：ml-100k

import os
import numpy as np
import torch
import torch.nn as nn
from torch.nn import Module, Parameter, MSELoss
from torch.utils.data import Dataset, DataLoader, TensorDataset 
from sklearn.model_selection import train_test_split
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda:0" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else "cpu"))
batch_size = 100
num_epochs = 10
dim=10
X_train, X_test, y_train, y_test = train_test_split(data[:,:-1], data[:,-1], test_size=0.4, random_state=0)
train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float()), batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float()), batch_size=batch_size, shuffle=False, pin_memory=True)

# bit-wise cross
class FactorizationMachine(Module):
    def __init__(self, num_features, dim):
        super(FactorizationMachine, self).__init__()
        self.num_features = num_features
        self.dim = dim
        self.w0 = Parameter(torch.randn((1,1)), requires_grad=True)
        self.w = Parameter(torch.randn((num_features, 1)), requires_grad=True)
        self.bw = Parameter(torch.randn((num_features, dim)), requires_grad=True)
    def forward(self, X: torch.Tensor):
        # 实现方式一：
        # tmp = torch.matmul(self.bw, self.bw.T)
        # tmp[np.tril_indices(self.num_features)] = 0
        # y = torch.sigmoid(self.w0 + torch.sum(torch.matmul(X, self.w), dim=-1, keepdim=True) +  torch.sum(torch.sum(torch.bmm(X.unsqueeze(-1), X.unsqueeze(1)) * tmp.unsqueeze(0), dim=-1, keepdim=False), dim=-1, keepdim=True))
        # 实现方式二：和的平方 - 平方的和
        sum_square = torch.sum(self.bw.unsqueeze(0) * X.unsqueeze(-1).repeat(1,1,self.dim), dim=1).square()
        square_sum = torch.sum(self.bw.square().unsqueeze(0) * X.square().unsqueeze(-1).repeat(1,1,self.dim), dim=1)
        y = torch.sigmoid(self.w0 + torch.sum(torch.matmul(X, self.w), dim=-1, keepdim=True) +  torch.sum(sum_square - square_sum, dim=-1, keepdim=True))
        return y.squeeze()
    def parameters(self, recurse: bool = True):
        return [self.w0, self.w, self.bw]
model = FactorizationMachine(num_features=num_features, dim=dim).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)
criterion = MSELoss(reduction='sum').to(device)

for epoch in range(num_epochs):
    # train:
    epoch_train_losses = []
    model.train()
    for i, inputs in enumerate(train_loader):
        optimizer.zero_grad()
        input = inputs[0].to(device)
        label = inputs[1].to(device)
        output = model(input)
        loss = criterion(output, label)
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)
        optimizer.step()
        epoch_train_losses.append([input.shape[0], loss.item()])
    # validate:
    model.eval()
    epoch_test_losses = []
    for i, inputs in enumerate(test_loader):
        input = inputs[0].to(device)
        label = inputs[1].to(device)
        output = model(input)
        loss = criterion(output, label)
        epoch_test_losses.append([input.shape[0], loss.item()])
    train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])
    test_loss  = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])
    # print
    print('['+datetime.now().strftime("%Y-%m-%d %H:%M:%S")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs,  train_loss, test_loss))


[2023-09-01 09:29:47] epoch=[1/10], train_mse_loss: 0.2159, validate_mse_loss: 0.2086
[2023-09-01 09:29:49] epoch=[2/10], train_mse_loss: 0.2061, validate_mse_loss: 0.2053
[2023-09-01 09:29:50] epoch=[3/10], train_mse_loss: 0.1970, validate_mse_loss: 0.1976
[2023-09-01 09:29:52] epoch=[4/10], train_mse_loss: 0.1865, validate_mse_loss: 0.1806
[2023-09-01 09:29:54] epoch=[5/10], train_mse_loss: 0.1746, validate_mse_loss: 0.1720
[2023-09-01 09:29:56] epoch=[6/10], train_mse_loss: 0.1675, validate_mse_loss: 0.1660
[2023-09-01 09:29:57] epoch=[7/10], train_mse_loss: 0.1644, validate_mse_loss: 0.1670
[2023-09-01 09:29:59] epoch=[8/10], train_mse_loss: 0.1627, validate_mse_loss: 0.1650
[2023-09-01 09:30:01] epoch=[9/10], train_mse_loss: 0.1619, validate_mse_loss: 0.1622
[2023-09-01 09:30:03] epoch=[10/10], train_mse_loss: 0.1611, validate_mse_loss: 0.1631
