In [21]:
import yaml
import torch
import numpy as np
from tqdm import tqdm
from model.ensrec import EnsRec
from torch.utils.data import DataLoader
from data import Data, SeqBPRDataset

In [None]:
with open("config/bert_config.yaml", 'r', encoding='utf-8') as f:
    args = yaml.unsafe_load(f)
data = Data(args['data'])

In [4]:
np.save(f'datasets/{args["data"]["name"]}/train_samples.npy', data.train_samples)
np.save(f'datasets/{args["data"]["name"]}/test_samples.npy', data.test_samples)

In [17]:
train_samples = np.load(f'datasets/{args["data"]["name"]}/train_samples.npy', allow_pickle=True)
test_samples = np.load(f'datasets/{args["data"]["name"]}/test_samples.npy', allow_pickle=True)
train_dataset = SeqBPRDataset(train_samples, args['data']['device'])
test_dataset = SeqBPRDataset(test_samples, args['data']['device'], is_test=True)
train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

## 评估指标

- nDCG

In [4]:
def nDCG(rec_items, test_set):
    DCG = lambda x: np.sum(x / np.log(np.arange(2, len(x) + 2)))
    def get_implict_matrix(rec_items, test_set):
        rel_matrix = [[0] * rec_items.shape[1] for _ in range(rec_items.shape[0])]
        for user in range(len(test_set)):
            for index, item in enumerate(rec_items[user]):
                if item in test_set[user]:
                    rel_matrix[user][index] = 1
        return np.array(rel_matrix)
    rel_matrix = get_implict_matrix(rec_items, test_set)
    ndcgs = []
    for user in range(len(test_set)):
        rels = rel_matrix[user]
        dcg = DCG(rels)
        idcg = DCG(sorted(rels, reverse=True))
        ndcg = dcg / idcg if idcg != 0 else 0
        ndcgs.append(ndcg)
    return ndcgs

In [5]:
def map(recommended_items, interacted_items):
    interacted_set = set(interacted_items)
    
    hits, precisions = [], []
    relevant_count = 0
    for i, item in enumerate(recommended_items):
        position = i + 1  # 位置从1开始计数

        is_relevant = item in interacted_set
        hits.append(1 if is_relevant else 0)
        if is_relevant:
            relevant_count += 1
            precision_at_k = relevant_count / position
            precisions.append(precision_at_k)
    
    if not precisions:
        return 0.0
    return sum(precisions) / len(precisions)

In [6]:
def mrr(results, relevant_docs):
    relevant_set = set(relevant_docs)
    rank = 0
    for i, doc_id in enumerate(results):
        if doc_id in relevant_set:
            rank = i + 1  # 排名从1开始
            break
    if rank > 0:
        reciprocal_rank = 1.0 / rank
        return reciprocal_rank
    else:
        return 0.0

In [7]:
def infer(model, topk, metric):
    with torch.no_grad():
        metrics = []
        for batch in tqdm(test_loader, desc="计算测试集指标"):
            all_scores = model(batch, is_test=True)
            scores, indices = torch.topk(all_scores, topk)

            for i in range(len(batch['user_id'])):
                user_id = batch['user_id'][i].item()
                pos_item = batch['pos_item'][i].item()

                true_item_ids = data.user_interacted_item_ids[user_id]
                true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

                predicted_item_ids = np.array([indices[i].cpu().numpy().tolist()])
                if metric == 'map':
                    score = map(predicted_item_ids[0], true_item_ids)
                elif metric == 'ndcg':
                    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
                elif metric == 'mrr':
                    score = mrr(predicted_item_ids[0], true_item_ids)
                metrics.append(score)
    return np.mean(metrics)

## EnsRec

In [15]:
from model.old_ensrec import OldEnsRec
ensrec = OldEnsRec(args['model'], args['data'], data.n_user)
ensrec.load_state_dict(torch.load("D:\Code\graduation_design/bpr/ckpt/kuairec/ensrec_0.463.pth"), strict=True)
ensrec.eval()

0it [00:00, ?it/s]

10733it [00:00, 421357.98it/s]

>>>> 加载预计算的物品嵌入...





OldEnsRec(
  (dien): DIEN(
    (gru_cell): GRUCell(64, 64)
    (attention): AttentionLayer(
      (q_proj): Linear(in_features=64, out_features=64, bias=True)
      (k_proj): Linear(in_features=64, out_features=64, bias=True)
      (v_proj): Linear(in_features=64, out_features=64, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (augru_cell): GRUCell(128, 64)
    (dropout): Dropout(p=0.1, inplace=False)
    (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (output_layer): Linear(in_features=64, out_features=1, bias=True)
  )
  (user_embeddings): Embedding(1411, 64)
  (item_tower): ItemTower(
    (item_transform): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
    )
    (preference_alignment): PreferenceAlignmentModule(
      (content_adaptor): Linear(in_features=64, out_features=64, bias=True)
      (position_embeddings): Embedding(20, 64)
 

In [10]:
from model.ensrec_ablation.no_user import EnsRec
ensrec = EnsRec(args['model'], args['data'], data.n_user)
ensrec.load_state_dict(torch.load("D:\Code\graduation_design/bpr/ckpt/ml-1m/ensrec_no_user_0.3939.pth"), strict=True)
ensrec.eval()

>>>> 加载预计算的物品嵌入...


EnsRec(
  (dien): DIEN(
    (gru_cell): GRUCell(64, 64)
    (attention): AttentionLayer(
      (q_proj): Linear(in_features=64, out_features=64, bias=True)
      (k_proj): Linear(in_features=64, out_features=64, bias=True)
      (v_proj): Linear(in_features=64, out_features=64, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (augru_cell): GRUCell(128, 64)
    (dropout): Dropout(p=0.1, inplace=False)
    (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (output_layer): Linear(in_features=64, out_features=1, bias=True)
  )
  (user_embeddings): Embedding(6033, 64)
  (item_tower): ItemTower(
    (item_transform): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
    )
    (preference_alignment): PreferenceAlignmentModule(
      (content_adaptor): Linear(in_features=64, out_features=64, bias=True)
      (position_embeddings): Embedding(20, 64)
    

In [19]:
from model.ensrec_ablation.no_item import EnsRec
ensrec = EnsRec(args['model'], args['data'], data.n_user, args['data'][args['data']['name']]['n_item'])
ensrec.load_state_dict(torch.load("D:\Code\graduation_design/bpr/ckpt/ml-1m/ensrec_no_item_0.4169.pth"), strict=True)
ensrec.eval()

>>>> 加载预计算的物品嵌入...


EnsRec(
  (dien): DIEN(
    (gru_cell): GRUCell(64, 64)
    (attention): AttentionLayer(
      (q_proj): Linear(in_features=64, out_features=64, bias=True)
      (k_proj): Linear(in_features=64, out_features=64, bias=True)
      (v_proj): Linear(in_features=64, out_features=64, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (augru_cell): GRUCell(128, 64)
    (dropout): Dropout(p=0.1, inplace=False)
    (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (output_layer): Linear(in_features=64, out_features=1, bias=True)
  )
  (user_embeddings): Embedding(6033, 64)
  (item_embeddings): Embedding(3953, 64)
  (item_tower): ItemTower(
    (item_transform): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
    )
    (preference_alignment): PreferenceAlignmentModule(
      (content_adaptor): Linear(in_features=64, out_features=64, bias=True)
      (pos

In [13]:
ensrec = EnsRec(args['model'], args['data'], data.n_user)
ensrec.load_state_dict(torch.load("D:\Code\graduation_design/bpr/ckpt/kuairec/ensrec_0.463.pth"), strict=False)
ensrec.eval()

>>>> 加载预计算的物品嵌入...


EnsRec(
  (dien): DIEN(
    (gru_cell): GRUCell(64, 64)
    (attention): AttentionLayer(
      (q_proj): Linear(in_features=64, out_features=64, bias=True)
      (k_proj): Linear(in_features=64, out_features=64, bias=True)
      (v_proj): Linear(in_features=64, out_features=64, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (augru_cell): GRUCell(128, 64)
    (dropout): Dropout(p=0.1, inplace=False)
    (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (output_layer): Linear(in_features=64, out_features=1, bias=True)
  )
  (user_embeddings): Embedding(1411, 64)
  (item_tower): ItemTower(
    (item_transform): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
    )
    (preference_alignment): PreferenceAlignmentModule(
      (content_adaptor): Linear(in_features=64, out_features=64, bias=True)
      (position_embeddings): Embedding(20, 64)
    

In [20]:
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=True)
infer(ensrec, 10, 'ndcg')

计算测试集指标:   0%|          | 0/17 [00:00<?, ?it/s]

计算测试集指标: 100%|██████████| 17/17 [00:16<00:00,  1.02it/s]


0.41685563177143087

## 其他集成方法

### CombSum

In [13]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    scores, indices = torch.topk(all_scores, 10)

    for i in range(len(batch['user_id'])):
        user_id = batch['user_id'][i].item()
        pos_item = batch['pos_item'][i].item()

        true_item_ids = data.user_interacted_item_ids[user_id]
        true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

        predicted_item_ids = np.array([indices[i].cpu().numpy().tolist()])
        # score = nDCG(np.array(predicted_item_ids), [true_item_ids])
        # score = map(predicted_item_ids[0], true_item_ids)
        score = mrr(predicted_item_ids[0], true_item_ids)
        metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 14904/14904 [00:22<00:00, 666.08it/s]


0.15995455571743816

### CombMNZ

In [16]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    item_ids_np = batch['base_model_preds'].cpu().numpy().reshape(-1)
    item_indices = torch.tensor(np.array([data.item_to_id[id] for id in item_ids_np]), device="cuda")
    all_item_counts = torch.bincount(item_indices, minlength=data.n_item)
    all_scores = all_scores * all_item_counts.unsqueeze(0)
    scores, indices = torch.topk(all_scores, 10)

    user_id = batch['user_id'].item()
    pos_item = batch['pos_item'].item()

    true_item_ids = data.user_interacted_item_ids[user_id]
    true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

    predicted_item_ids = np.array([indices[0].cpu().numpy().tolist()])
    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
    # score = map(predicted_item_ids[0], true_item_ids)
    # score = mrr(predicted_item_ids[0], true_item_ids)
    metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 14904/14904 [00:29<00:00, 502.75it/s]


0.22983398313794107

### ​CombANZ

In [19]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    item_ids_np = batch['base_model_preds'].cpu().numpy().reshape(-1)
    item_indices = torch.tensor(np.array([data.item_to_id[id] for id in item_ids_np]), device="cuda")
    all_item_counts = torch.bincount(item_indices, minlength=data.n_item)
    # 将计数为0的位置替换为无穷大
    all_item_counts[all_item_counts == 0] = 1_000_000_000
    all_scores = all_scores / all_item_counts.unsqueeze(0)
    scores, indices = torch.topk(all_scores, 10)

    user_id = batch['user_id'].item()
    pos_item = batch['pos_item'].item()

    true_item_ids = data.user_interacted_item_ids[user_id]
    true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

    predicted_item_ids = np.array([indices[0].cpu().numpy().tolist()])
    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
    # score = map(predicted_item_ids[0], true_item_ids)
    # score = mrr(predicted_item_ids[0], true_item_ids)
    metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 14904/14904 [00:39<00:00, 377.92it/s]


0.12974145658493563

### AEM

In [15]:
from model.aem import AEM
aem = AEM(args['model'], args['data'], data.n_user, 10737)
aem.load_state_dict(torch.load("D:\Code\graduation_design\\bpr\ckpt\kuairec\\aem_0.4313.pth"))
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=True)
infer(aem, 10, 'mrr')

计算测试集指标:   0%|          | 0/50 [00:00<?, ?it/s]

计算测试集指标: 100%|██████████| 50/50 [00:59<00:00,  1.18s/it]


0.3356730373965977

### Stack

In [23]:
from model.stack import StackingModel
stack_model = StackingModel(args['model'], args['data'], data.n_user)
stack_model.load_state_dict(torch.load("D:\Code\graduation_design\\bpr\ckpt\kuairec\stack_0.4256.pth"))
stack_model.eval()
infer(stack_model, 10, 'mrr')

计算测试集指标:   0%|          | 0/50 [00:00<?, ?it/s]

计算测试集指标: 100%|██████████| 50/50 [00:12<00:00,  3.98it/s]


0.3310864184321948

## 基础模型

In [7]:
base_model_results = {}
for base_model in args['data']['base_model']:
    model = np.load(args['data']['base_model_path'] + f"/{base_model}.npy")
    print(model.shape)
    scores = []
    phar = tqdm(test_samples, desc=f"计算{base_model}测试集指标...")
    for case in phar:
        user_id = case['user_id']
        pos_item = case['pos_item']

        interaction_idx = data.get_interaction_index(user_id, data.item_to_id[pos_item])
        assert interaction_idx != -1
        print(interaction_idx)

        predicted_item_ids = model[interaction_idx][2:2+10]
        true_item_ids = data.user_interacted_item_ids[user_id]
        true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

        predicted_items = np.array([predicted_item_ids])
        score = nDCG(np.array(predicted_item_ids), [true_item_ids])
        # score = map(predicted_items[0], true_item_ids)
        # score = mrr(predicted_items[0], true_item_ids)
        scores.append(score)

        phar.set_postfix(score=score)
    print(f"{base_model}: {np.mean(scores)}")

(2536880, 102)


计算acf测试集指标...:   0%|          | 0/25369 [00:00<?, ?it/s]

4597068





IndexError: index 4597068 is out of bounds for axis 0 with size 2536880