In [1]:
import torch
import torch.nn as nn
from tqdm import tqdm
import torch.optim as optim
import numpy as np

from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score,roc_curve,auc
from sklearn.datasets import make_classification

import matplotlib.pyplot as plt

In [51]:
class Config:
    num_users = 1000
    num_items = 2000
    embed_dim = 16
    hidden_dims = [64, 32, 16]
    batch_size = 32
    lr = 0.001
    num_epochs = 30

# 自定义数据集类
class CFDataset(Dataset):
    def __init__(self, num_samples=10000):
        # 生成示例数据（实际使用时替换为真实数据）
        self.user_ids = np.random.randint(0, Config.num_users, size=num_samples)
        self.item_ids = np.random.randint(0, Config.num_items, size=num_samples)
        self.labels = np.random.randint(0, 2, size=num_samples).astype(np.float32)
    
    def __len__(self):
        return len(self.user_ids)
    
    def __getitem__(self, idx):
        return (
            torch.tensor(self.user_ids[idx], dtype=torch.long),
            torch.tensor(self.item_ids[idx], dtype=torch.long),
            torch.tensor(self.labels[idx], dtype=torch.float)
        )

In [52]:
class NeuralCF(nn.Module):
    def __init__(self, Config):
        super().__init__()
        # 定义用户和物品的隐向量
        self.user_embed_gmf = nn.Embedding(Config.num_users, Config.embed_dim)  # GMF用户隐向量
        self.item_embed_gmf = nn.Embedding(Config.num_items, Config.embed_dim)  # GMF物品隐向量

        self.user_embed_mlp = nn.Embedding(Config.num_users, Config.embed_dim)  # MLP用户隐向量
        self.item_embed_mlp = nn.Embedding(Config.num_items, Config.embed_dim)  # MLP物品隐向量

        # MLP层
        input_dim = 2 * Config.embed_dim
        mlp_layers = []
        for output_dim in Config.hidden_dims:
            mlp_layers.append(nn.Linear(input_dim, output_dim))
            mlp_layers.append(nn.ReLU())
            input_dim = output_dim

        self.mlp = nn.Sequential(*mlp_layers)

        # 输出层
        total_dim = Config.embed_dim + Config.hidden_dims[-1]  # GMF + MLP层维度
        self.fc = nn.Sequential(
            nn.Linear(total_dim, 1),
            nn.Sigmoid()
        )
    
    def forward(self, user_ids, item_ids):
        # 获取用户和物品的隐向量
        user_emb_gmf = self.user_embed_gmf(user_ids)
        item_emb_gmf = self.item_embed_gmf(item_ids)

        user_emb_mlp = self.user_embed_mlp(user_ids)
        item_emb_mlp = self.item_embed_mlp(item_ids)

        # GMF: 逐元素乘积
        gmf = user_emb_gmf * item_emb_gmf

        # MLP: 拼接并通过多层感知机
        concat_emb = torch.cat([user_emb_mlp, item_emb_mlp], dim=1)
        mlp = self.mlp(concat_emb)

        # 拼接GMF和MLP的结果
        neuralcf_emb = torch.cat([mlp, gmf], dim=1)

        # 输出层
        output = self.fc(neuralcf_emb).squeeze()
        return output


In [50]:
dataset = CFDataset()
dataloader = DataLoader(dataset, batch_size=Config.batch_size, shuffle=True)
model = NeuralCF(Config)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=Config.lr)

# 训练循环
pbar = tqdm(range(Config.num_epochs), unit="epoch", desc="Training", ncols = 100)

for epoch in pbar:
    total_loss = 0.0
    for user_ids, item_ids, labels in dataloader:
        optimizer.zero_grad()
        predictions = model(user_ids, item_ids)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    avg_loss = total_loss / len(dataloader)
    pbar.set_postfix({"Loss": f"{avg_loss:.4f}"})



Training: 100%|███████████████████████████████████| 100/100 [00:39<00:00,  2.51epoch/s, Loss=0.0031]
