# Preparation

In [3]:
import torch
torch.__version__

'2.3.0a0+6ddf5cf85e.nv24.04'

In [4]:
import os
from torch import nn
# from torch.utils.data import DataLoader
# from torchvision import datasets, transforms

In [5]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


# Construct network

## Combination in one class (NO REDUNDANT CODE)

In [6]:
class Recommender(nn.Module):
    def __init__(self, config):
        super(Recommender, self).__init__()
        
        self.embedding_user = torch.nn.Embedding(num_embeddings=config['num_users'], embedding_dim=config['latent_dim'])
        self.embedding_item = torch.nn.Embedding(num_embeddings=config['num_items'], embedding_dim=config['latent_dim'])
        
        ## MLP part
        self.fc_layers = torch.nn.ModuleList()
        for idx, (in_size, out_size) in enumerate(zip(config['layers'][:-1], config['layers'][1:])):
            self.fc_layers.append(torch.nn.Linear(in_size, out_size))
        
        self.logits = torch.nn.Linear(in_features=config['layers'][-1] + config['latent_dim'], out_features=1)
        self.sigmoid = torch.nn.Sigmoid()
        
        self.model_type = config['model_type']
        
    def forward(self, user_indices, item_indices):
        user_embedding = self.embedding_user(user_indices)
        item_embedding = self.embedding_item(item_indices)
        
        if self.model_type == 'GMF':
            vector = torch.mul(user_embedding, item_embedding)
        elif self.model_type == 'MLP':
            vector = torch.cat([user_embedding, item_embedding], dim=-1)
            for fc_layer in self.fc_layers:
                vector = fc_layer(vector)
                vector = torch.nn.ReLU()(vector)
        elif self.model_type == 'NeuMF':
            gmf_vector = torch.mul(user_embedding, item_embedding)
            mlp_vector = torch.cat([user_embedding, item_embedding], dim=-1)
            for fc_layer in self.fc_layers:
                mlp_vector = fc_layer(mlp_vector)
                mlp_vector = torch.nn.ReLU()(mlp_vector)
            vector = torch.cat([gmf_vector, mlp_vector], dim=-1)
        
        # logits = self.logits(vector)
        # output = self.sigmoid(logits)
        output = self.sigmoid(vector)
        return output


# Load data & data preprocess

In [7]:
from torch.utils.data import Dataset, DataLoader
import scipy.sparse as sp

class RatingDataset(Dataset):
    def __init__(self, filename):
        self.data = self.load_rating_file_as_tensor(filename)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
    
    def load_rating_file_as_tensor(self, filename):
        ratingList = []
        with open(filename, "r") as f:
            for line in f:
                arr = line.split("\t")
                user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
                ratingList.append([user, item, rating])
        return torch.tensor(ratingList, dtype=torch.float32)

class NegativeDataset(Dataset):
    def __init__(self, filename):
        self.data = self.load_negative_file_as_tensor(filename)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

    def load_negative_file_as_tensor(self, filename):
        negativeList = []
        with open(filename, "r") as f:
            for line in f:
                arr = line.split("\t")
                negatives = [int(x) for x in arr[1:]]
                negativeList.append(negatives)
        return torch.tensor(negativeList, dtype=torch.int32)

def load_rating_file_as_sparse(filename):
    num_users, num_items = 0, 0
    with open(filename, "r") as f:
        for line in f:
            arr = line.split("\t")
            user, item = int(arr[0]), int(arr[1])
            num_users = max(num_users, user)
            num_items = max(num_items, item)

    mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
    with open(filename, "r") as f:
        for line in f:
            arr = line.split("\t")
            user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
            if rating > 0:
                mat[user, item] = rating
    return mat.tocsr()


In [53]:
torch.zeros(1, 3)

tensor([[0., 0., 0.]])

In [47]:
import pandas as pd
df = pd.read_csv("Data/ml-1m.train.rating",sep='\t', header=None, names=['uid', 'mid', 'rating', 'timestamp'], engine='python')
df

Unnamed: 0,uid,mid,rating,timestamp
0,0,32,4,978824330
1,0,34,4,978824330
2,0,4,5,978824291
3,0,35,4,978824291
4,0,30,4,978824291
...,...,...,...,...
994164,6039,1092,5,956703977
994165,6039,41,4,956703977
994166,6039,128,5,956703954
994167,6039,323,4,956703954


In [33]:
df2 = pd.read_csv("Data/ml-1m.test.negative",sep = "\t",header=None)
df2


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,"(0,25)",1064,174,2791,3373,269,2678,1902,3641,1216,...,2854,3067,58,2551,2333,2688,3703,1300,1924,3118
1,"(1,133)",1072,3154,3368,3644,549,1810,937,1514,1713,...,1535,341,3525,1429,2225,1628,2061,469,3056,2553
2,"(2,207)",2216,209,2347,3,1652,3397,383,2905,2284,...,953,865,813,1353,2945,2580,2989,2790,2879,2481
3,"(3,208)",3023,1489,1916,1706,1221,1191,2671,81,2483,...,3347,1707,2901,2767,2167,1921,247,1618,2016,2323
4,"(4,222)",1794,3535,108,593,466,2048,854,1378,1301,...,2490,1332,2526,2804,2027,833,176,463,2851,2453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035,"(6035,1048)",2495,3406,819,729,1920,2003,3329,2351,549,...,2583,2905,2713,2361,2542,2598,2030,2984,3382,2771
6036,"(6036,294)",2248,1318,3661,72,351,2131,3281,2482,639,...,110,508,2168,354,1156,1646,3238,2091,1494,2489
6037,"(6037,1528)",2194,867,1424,2517,3080,2789,1210,3150,466,...,1428,433,74,3457,833,2823,2425,3434,2331,2530
6038,"(6038,1449)",2606,2054,2754,1299,2854,2413,1055,742,2876,...,2140,3401,813,1374,307,1477,2327,114,98,3021


In [8]:
train_dataset = RatingDataset("Data/ml-1m.train.rating")
train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True)

test_ratings = RatingDataset("Data/ml-1m.test.rating")
test_negatives = NegativeDataset("Data/ml-1m.test.negative")


In [9]:

for batch in train_dataloader:
    # 进行模型训练
    pass

# 加载评分数据为稀疏矩阵
# train_matrix = load_rating_file_as_sparse("train.rating")


FileNotFoundError: [Errno 2] No such file or directory: 'train.rating'

# Trian and Test

In [None]:
# config = {
#     'num_users': 1000,
#     'num_items': 2000,
#     'latent_dim': 8,
#     'layers': [64, 32, 16],
#     'model_type': 'GMF'     #　MLP, NeuMF
# }

# user_indices = 
# item_indices = 
# recommender = Recommender(config)
# output = recommender(user_indices, item_indices)

In [None]:
model = Recommender().to(device)
print(model)

In [None]:
# calculation loss
criterion = torch.nn.BCELoss()
output = model(input)  # 模型的输出，是一个概率值
loss = criterion(output, target)  # 计算逻辑回归损失
