In [None]:
import torch
import yaml
from tqdm import tqdm
import numpy as np
from data import BPRSampleGenerator, SeqBPRDataset
from torch.utils.data import DataLoader
from model import SeqLearn

In [None]:
with open("/graduation_design/bpr/config/bpr.yaml", 'r', encoding='utf-8') as f:
    args = yaml.unsafe_load(f)
args

In [None]:
acf = np.load(args['data']['base_model_path'] + f"/acf.npy")

In [None]:
# 创建数据生成器
generator = BPRSampleGenerator(args['data'])
seq_samples = generator.generate_seq_samples(
    seq_len=args['data']['maxlen'],
    num_negatives=args['data']['num_negatives']
)

# 创建数据集
dataset = SeqBPRDataset(seq_samples, args['model']['device'])
train_size = int(args['data']['train_valid_split'] * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)

In [None]:
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False, drop_last=True)

In [None]:
model = np.load(args['data']['base_model_path'] + f"/sasrec.npy")

with torch.no_grad():
    ndcg_scores = []
    for batch in tqdm(test_loader, desc="计算NDCG@10..."):
        users, user_seq, pos_items, neg_items, base_model_preds = batch
        
        # 遍历batch中的每个样本
        for i in range(len(users)):
            # 获取交互索引
            user_id = users[i].item()
            pos_item_id = pos_items[i].item()
            interaction_idx = generator.get_interaction_index(user_id, pos_item_id)
            if interaction_idx is None:
                continue

            # 获取模型推荐的top物品列表
            top_items = model[interaction_idx][2:2+args['topk']]

            # 获取用户的实际交互物品
            true_items = generator.user_interacted_items[user_id]

            # 计算DCG
            dcg = 0
            for j, item_idx in enumerate(top_items):
                if item_idx in true_items:
                    dcg += 1 / np.log2(j + 2)
            
            # 计算IDCG
            idcg = 0
            for j in range(min(len(true_items), args['topk'])):
                idcg += 1 / np.log2(j + 2)

            # 计算NDCG
            ndcg = dcg / idcg if idcg > 0 else 0
            ndcg_scores.append(ndcg)

np.mean(ndcg_scores)

## 集成模型预测结果

In [None]:
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False, drop_last=True)

model = SeqLearn(args['model'], args['data'], 6033, generator.n_item)
# 加载checkpoint
ckpt = torch.load(f"/graduation_design/bpr/ckpt/bpr_epoch1_batch60.pth")

# 过滤掉不需要加载的层
filtered_ckpt = {k: v for k, v in ckpt.items() if not k.startswith('item_tower.cex')}

# 加载过滤后的权重
model.load_state_dict(filtered_ckpt, strict=False)

model.eval()
with torch.no_grad():
    ndcg_scores = []
    for batch in tqdm(test_loader, desc="计算测试集NDCG"):
        users, user_seq, pos_items, neg_items, base_model_preds = batch

        # 获取所有物品的预测分数
        all_items = torch.arange(len(generator.item_to_id)).to(args['model']['device'])
        all_scores = model.predict(users, user_seq, all_items, base_model_preds)

        _, indices = torch.topk(all_scores, args['topk'])

        for user_idx in range(len(users)):
            # 获取用户的实际交互物品
            true_items = generator.user_interacted_items[users[user_idx].item()]

            # 计算DCG
            dcg = 0
            for i, item_idx in enumerate(indices[user_idx]):
                if item_idx.item() in true_items:
                    dcg += 1 / np.log2(i + 2)

            # 计算IDCG
            idcg = 0
            for i in range(min(len(true_items), args['topk'])):
                idcg += 1 / np.log2(i + 2)

            # 计算NDCG
            ndcg = dcg / idcg if idcg > 0 else 0
            ndcg_scores.append(ndcg)

np.mean(ndcg_scores)