# Preparation

In [1]:
import torch
torch.__version__

'2.3.0a0+6ddf5cf85e.nv24.04'

In [None]:
import os
from torch import nn
# from torch.utils.data import DataLoader
# from torchvision import datasets, transforms

In [5]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


# Construct network

## Combination in one class (NO REDUNDANT CODE)

In [4]:
class Recommender(nn.Module):
    def __init__(self, config):
        super(Recommender, self).__init__()
        
        self.embedding_user = torch.nn.Embedding(num_embeddings=config['num_users'], embedding_dim=config['latent_dim'])
        self.embedding_item = torch.nn.Embedding(num_embeddings=config['num_items'], embedding_dim=config['latent_dim'])
        
        ## MLP part
        self.fc_layers = torch.nn.ModuleList()
        for idx, (in_size, out_size) in enumerate(zip(config['layers'][:-1], config['layers'][1:])):
            self.fc_layers.append(torch.nn.Linear(in_size, out_size))
        
        self.logits = torch.nn.Linear(in_features=config['layers'][-1] + config['latent_dim'], out_features=1)
        self.sigmoid = torch.nn.Sigmoid()
        
        self.model_type = config['model_type']
        
    def forward(self, user_indices, item_indices):
        user_embedding = self.embedding_user(user_indices)
        item_embedding = self.embedding_item(item_indices)
        
        if self.model_type == 'GMF':
            vector = torch.mul(user_embedding, item_embedding)
        elif self.model_type == 'MLP':
            vector = torch.cat([user_embedding, item_embedding], dim=-1)
            for fc_layer in self.fc_layers:
                vector = fc_layer(vector)
                vector = torch.nn.ReLU()(vector)
        elif self.model_type == 'NeuMF':
            gmf_vector = torch.mul(user_embedding, item_embedding)
            mlp_vector = torch.cat([user_embedding, item_embedding], dim=-1)
            for fc_layer in self.fc_layers:
                mlp_vector = fc_layer(mlp_vector)
                mlp_vector = torch.nn.ReLU()(mlp_vector)
            vector = torch.cat([gmf_vector, mlp_vector], dim=-1)
        
        # logits = self.logits(vector)
        # output = self.sigmoid(logits)
        output = self.sigmoid(vector)
        return output


# Load data & data preprocess

In [None]:
from torch.utils.data import Dataset, DataLoader
import scipy.sparse as sp

class RatingDataset(Dataset):
    def __init__(self, filename):
        self.data = self.load_rating_file_as_tensor(filename)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
    
    def load_rating_file_as_tensor(self, filename):
        ratingList = []
        with open(filename, "r") as f:
            for line in f:
                arr = line.split("\t")
                user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
                ratingList.append([user, item, rating])
        return torch.tensor(ratingList, dtype=torch.float32)

class NegativeDataset(Dataset):
    def __init__(self, filename):
        self.data = self.load_negative_file_as_tensor(filename)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
    
    def load_negative_file_as_tensor(self, filename):
        negativeList = []
        with open(filename, "r") as f:
            for line in f:
                arr = line.split("\t")
                negatives = [int(x) for x in arr[1:]]
                negativeList.append(negatives)
        return torch.tensor(negativeList, dtype=torch.int32)

def load_rating_file_as_sparse(filename):
    num_users, num_items = 0, 0
    with open(filename, "r") as f:
        for line in f:
            arr = line.split("\t")
            user, item = int(arr[0]), int(arr[1])
            num_users = max(num_users, user)
            num_items = max(num_items, item)
    
    mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
    with open(filename, "r") as f:
        for line in f:
            arr = line.split("\t")
            user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
            if rating > 0:
                mat[user, item] = rating
    return mat.tocsr()

# 示例用法
train_dataset = RatingDataset("train.rating")
train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True)

test_ratings = RatingDataset("test.rating")
test_negatives = NegativeDataset("test.negative")

for batch in train_dataloader:
    # 进行模型训练
    pass

# 加载评分数据为稀疏矩阵
train_matrix = load_rating_file_as_sparse("train.rating")


# Trian and Test

In [None]:
# config = {
#     'num_users': 1000,
#     'num_items': 2000,
#     'latent_dim': 8,
#     'layers': [64, 32, 16],
#     'model_type': 'GMF'     #　MLP, NeuMF
# }

# user_indices = 
# item_indices = 
# recommender = Recommender(config)
# output = recommender(user_indices, item_indices)

In [None]:
model = Recommender().to(device)
print(model)

In [None]:
# calculation loss
criterion = torch.nn.BCELoss()
output = model(input)  # 模型的输出，是一个概率值
loss = criterion(output, target)  # 计算逻辑回归损失
