# Preparation

In [48]:
import torch
torch.__version__

'2.3.0a0+6ddf5cf85e.nv24.04'

In [49]:
import os
import pandas as pd
import numpy as np
import random
from torch import nn
import torch.optim as optim

In [50]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.enabled = True
seed_everything(2024)

In [51]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


# Construct network

In [52]:
class RecommenderModel(nn.Module):
    def __init__(self, config):
        super(RecommenderModel, self).__init__()
        
        self.embedding_user = torch.nn.Embedding(num_embeddings=config['num_users'], embedding_dim=config['latent_dim'])
        self.embedding_item = torch.nn.Embedding(num_embeddings=config['num_items'], embedding_dim=config['latent_dim'])
        
        ## MLP part
        self.fc_layers = torch.nn.ModuleList()
        for idx, (in_size, out_size) in enumerate(zip(config['layers'][:-1], config['layers'][1:])):
            self.fc_layers.append(torch.nn.Linear(in_size, out_size))
        
        self.logits = torch.nn.Linear(in_features=config['layers'][-1] + config['latent_dim'], out_features=1)
        self.sigmoid = torch.nn.Sigmoid()
        
        self.model_type = config['model_type']
        
    def forward(self, user_indices, item_indices):
        user_embedding = self.embedding_user(user_indices)
        item_embedding = self.embedding_item(item_indices)
        
        if self.model_type == 'GMF':
            vector = torch.mul(user_embedding, item_embedding)
        elif self.model_type == 'MLP':
            vector = torch.cat([user_embedding, item_embedding], dim=-1)
            for fc_layer in self.fc_layers:
                vector = fc_layer(vector)
                vector = torch.nn.ReLU()(vector)
        elif self.model_type == 'NeuMF':
            gmf_vector = torch.mul(user_embedding, item_embedding)
            mlp_vector = torch.cat([user_embedding, item_embedding], dim=-1)
            for fc_layer in self.fc_layers:
                mlp_vector = fc_layer(mlp_vector)
                mlp_vector = torch.nn.ReLU()(mlp_vector)
            vector = torch.cat([gmf_vector, mlp_vector], dim=-1)
        
        # logits = self.logits(vector)
        # output = self.sigmoid(logits)
        output = self.sigmoid(vector)
        return output


# Load data & data preprocess

In [59]:
from torch.utils.data import Dataset, DataLoader
import scipy.sparse as sp

class RatingDataset(Dataset):
    def __init__(self, rating_mat, user_num, item_num, negative_num=4):
        self.negative_num = negative_num
        self.mat = rating_mat
        self.user_num = user_num
        self.item_num = item_num
        self.user_ids = np.array(self.mat.row)
        self.item_ids = np.array(self.mat.col)
        self.ratings = np.array(self.mat.data, dtype=np.float32)
        self.labels = (self.ratings>0).astype(np.float32)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # get user id and item id
        user_id = self.user_ids[idx]
        item_id = self.item_ids[idx]
        labels = self.labels[idx]

        # create one hot vector
        user_vec = torch.zeros(self.user_num,dtype = torch.float32)
        item_vec = torch.zeros(self.item__num,dtype = torch.float32)
        user_vec[user_id] = 1
        item_vec[item_id] = 1
        
        # get negative items vec for this user
        return user_vec, item_vec, labels

def load_rating_file_as_sparse(filename):
    num_users, num_items = 0, 0
    with open(filename, "r") as f:
        for line in f:
            arr = line.split("\t")
            user, item = int(arr[0]), int(arr[1])
            num_users = max(num_users, user) + 1
            num_items = max(num_items, item) + 1

    mat = sp.dok_matrix((num_users, num_items), dtype=np.float32)
    with open(filename, "r") as f:
        for line in f:
            arr = line.split("\t")
            user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
            if rating > 0:
                mat[user, item] = rating
    return mat, num_users, num_items
    # return mat.tocsr(), num_users, num_items

def load_negative_file(filename):
    negativeList = []
    with open(filename, "r") as f:
        line = f.readline()
        while line != None and line != "":
            arr = line.split("\t")
            negatives = []
            for x in arr[1: ]:
                negatives.append(int(x))
            negativeList.append(negatives)
            line = f.readline()
    return negativeList

In [54]:
# import pandas as pd
# df = pd.read_csv("Data/ml-1m.train.rating",sep='\t', header=None, names=['uid', 'mid', 'rating', 'timestamp'], engine='python')
# df

In [68]:
# df2 = pd.read_csv("Data/ml-1m.test.negative",sep = "\t",header=None)
# df2


# Trian and Test

In [56]:
BATCH_SIZE = 256
NUMEPOCHS = 50

In [57]:
rating_mat, num_of_user, num_of_item = load_rating_file_as_sparse("Data/ml-1m.train.rating")
negative_list = load_negative_file("Data/ml-1m.test.negative")

train_dataset = RatingDataset(rating_mat, negative_list,num_of_user, num_of_item,negative_num=9)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

AttributeError: 'dok_matrix' object has no attribute 'row'

In [None]:
config = {
    'num_users': 1000,
    'num_items': 2000,
    'latent_dim': 8,
    'layers': [64, 32, 16,8],
    'model_type': 'GMF'     #　MLP, NeuMF
}

model = RecommenderModel(config)
model = model.to(device)
print(model)

In [None]:
# 定义损失函数和优化器
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
for epoch in range(NUMEPOCHS):
    for user, item, label in train_dataloader:  # dataloader应返回用户ID、物品ID和标签（1表示正样本，0表示负样本）
        # 清除之前的梯度
        optimizer.zero_grad()
        # 前向传播
        output = model(user, item)
        # 计算损失
        loss = criterion(output, label)
        # 反向传播和优化
        loss.backward()
        optimizer.step()
    # 每轮结束输出损失
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')