In [1]:
import yaml
import torch
import numpy as np
from tqdm import tqdm
from model.sem import Sem
from model.ensrec import EnsRec
from torch.utils.data import DataLoader
from data import Data, SeqBPRDataset

In [2]:
with open("config/bert_config.yaml", 'r', encoding='utf-8') as f:
    args = yaml.unsafe_load(f)
data = Data(args['data'])

>>>> 数据加载完成: 2536880 条交互, 1411 个用户, 3325 个物品


In [3]:
np.save(f'datasets/{args["data"]["name"]}/train_samples.npy', data.train_samples)
np.save(f'datasets/{args["data"]["name"]}/test_samples.npy', data.test_samples)

In [3]:
train_samples = np.load(f'datasets/{args["data"]["name"]}/train_samples.npy', allow_pickle=True)
test_samples = np.load(f'datasets/{args["data"]["name"]}/test_samples.npy', allow_pickle=True)
train_dataset = SeqBPRDataset(train_samples, args['data']['device'])
test_dataset = SeqBPRDataset(test_samples, args['data']['device'], is_test=True)
train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

## 评估指标

- nDCG

In [4]:
def nDCG(rec_items, test_set):
    DCG = lambda x: np.sum(x / np.log(np.arange(2, len(x) + 2)))
    def get_implict_matrix(rec_items, test_set):
        rel_matrix = [[0] * rec_items.shape[1] for _ in range(rec_items.shape[0])]
        for user in range(len(test_set)):
            for index, item in enumerate(rec_items[user]):
                if item in test_set[user]:
                    rel_matrix[user][index] = 1
        return np.array(rel_matrix)
    rel_matrix = get_implict_matrix(rec_items, test_set)
    ndcgs = []
    for user in range(len(test_set)):
        rels = rel_matrix[user]
        dcg = DCG(rels)
        idcg = DCG(sorted(rels, reverse=True))
        ndcg = dcg / idcg if idcg != 0 else 0
        ndcgs.append(ndcg)
    return ndcgs

- map

In [None]:
def map(recommended_items, interacted_items):
    interacted_set = set(interacted_items)
    
    hits, precisions = [], []
    relevant_count = 0
    for i, item in enumerate(recommended_items):
        position = i + 1  # 位置从1开始计数

        is_relevant = item in interacted_set
        hits.append(1 if is_relevant else 0)
        if is_relevant:
            relevant_count += 1
            precision_at_k = relevant_count / position
            precisions.append(precision_at_k)
    
    if not precisions:
        return 0.0
    return sum(precisions) / len(precisions)

- mrr

In [22]:
def mrr(results, relevant_docs):
    relevant_set = set(relevant_docs)
    rank = 0
    for i, doc_id in enumerate(results):
        if doc_id in relevant_set:
            rank = i + 1  # 排名从1开始
            break
    if rank > 0:
        reciprocal_rank = 1.0 / rank
        return reciprocal_rank
    else:
        return 0.0

In [7]:
def auc1(y_prob, y_label):
    n = len(y_prob)
    pos_prob = []
    neg_prob = []
    for i in range(n):
        if y_label[i]==1:
            pos_prob.append(y_prob[i])
        elif y_label[i]==0:
            neg_prob.append(y_prob[i])
    # 正样本预测概率->负样本预测概率的占比
    count = 0
    for p in pos_prob:
        for n in neg_prob:
            if p>n:
                count += 1
            elif p==n:
                count += 0.5
    return count/(len(pos_prob)*len(neg_prob))

In [5]:
def infer(model, topk, metric):
    with torch.no_grad():
        metrics = []
        for batch in tqdm(test_loader, desc="计算测试集指标"):
            all_scores = model(batch, is_test=True)
            scores, indices = torch.topk(all_scores, topk)

            for i in range(len(batch['user_id'])):
                user_id = batch['user_id'][i].item()
                pos_item = batch['pos_item'][i].item()

                true_item_ids = data.user_interacted_item_ids[user_id]
                true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

                predicted_item_ids = np.array([indices[i].cpu().numpy().tolist()])
                if metric == 'map':
                    score = map(predicted_item_ids[0], true_item_ids)
                elif metric == 'ndcg':
                    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
                elif metric == 'mrr':
                    score = mrr(predicted_item_ids[0], true_item_ids)
                metrics.append(score)
    return np.mean(metrics)

## EnsRec

In [5]:
ensrec = EnsRec(args['model'], args['data'], data.n_user)

>>>> 加载预计算的物品嵌入...


In [7]:
ckpt = torch.load("D:\work_space\epoch102_0.4187.pth")
# filtered_ckpt = {k: v for k, v in ckpt.items() if not k.startswith('item_tower.cex')}
ensrec.load_state_dict(ckpt, strict=False)
ensrec.eval()

EnsRec(
  (dien): DIEN(
    (gru_cell): GRUCell(64, 64)
    (attention): AttentionLayer(
      (q_proj): Linear(in_features=64, out_features=64, bias=True)
      (k_proj): Linear(in_features=64, out_features=64, bias=True)
      (v_proj): Linear(in_features=64, out_features=64, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (augru_cell): GRUCell(128, 64)
    (dropout): Dropout(p=0.1, inplace=False)
    (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (output_layer): Linear(in_features=64, out_features=1, bias=True)
  )
  (user_embeddings): Embedding(6033, 64)
  (item_tower): ItemTower(
    (item_transform): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
    )
    (preference_alignment): PreferenceAlignmentModule(
      (content_adaptor): Linear(in_features=64, out_features=64, bias=True)
      (position_embeddings): Embedding(20, 64)
    

In [9]:
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)
infer(ensrec, 10, 'ndcg')

计算测试集指标: 100%|██████████| 17/17 [00:13<00:00,  1.29it/s]


0.41873072378852494

## SEM

In [None]:
sem = Sem(args['model'], args['data'], data.n_user, 3952)
ckpt = torch.load(f"../bpr/ckpt_sem/sem_epoch3.pth")
sem.load_state_dict(ckpt, strict=False)
sem.eval()

In [None]:
infer(sem, 10, 'mrr')

## 其他集成方法

### CombSum

In [5]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    scores, indices = torch.topk(all_scores, 10)

    for i in range(len(batch['user_id'])):
        user_id = batch['user_id'][i].item()
        pos_item = batch['pos_item'][i].item()

        true_item_ids = data.user_interacted_item_ids[user_id]
        true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

        predicted_item_ids = np.array([indices[i].cpu().numpy().tolist()])
        score = nDCG(np.array(predicted_item_ids), [true_item_ids])
        metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 25369/25369 [00:45<00:00, 559.04it/s]


0.4071636437917846

### CombMNZ

In [6]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    item_ids_np = batch['base_model_preds'].cpu().numpy().reshape(-1)
    item_indices = torch.tensor(np.array([data.item_to_id[id] for id in item_ids_np]), device="cuda")
    all_item_counts = torch.bincount(item_indices, minlength=data.n_item)
    all_scores = all_scores * all_item_counts.unsqueeze(0)
    scores, indices = torch.topk(all_scores, 10)

    user_id = batch['user_id'].item()
    pos_item = batch['pos_item'].item()

    true_item_ids = data.user_interacted_item_ids[user_id]
    true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

    predicted_item_ids = np.array([indices[0].cpu().numpy().tolist()])
    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
    metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 25369/25369 [00:58<00:00, 436.05it/s]


0.41741408094559723

### ​CombANZ

In [7]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    item_ids_np = batch['base_model_preds'].cpu().numpy().reshape(-1)
    item_indices = torch.tensor(np.array([data.item_to_id[id] for id in item_ids_np]), device="cuda")
    all_item_counts = torch.bincount(item_indices, minlength=data.n_item)
    # 将计数为0的位置替换为无穷大
    all_item_counts[all_item_counts == 0] = 1_000_000_000
    all_scores = all_scores / all_item_counts.unsqueeze(0)
    scores, indices = torch.topk(all_scores, 10)

    user_id = batch['user_id'].item()
    pos_item = batch['pos_item'].item()

    true_item_ids = data.user_interacted_item_ids[user_id]
    true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

    predicted_item_ids = np.array([indices[0].cpu().numpy().tolist()])
    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
    metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 25369/25369 [00:59<00:00, 424.62it/s]


0.4353325855614328

## 基模型预测

In [11]:
test_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
batch = next(iter(test_loader))
batch

ValueError: too many dimensions 'str'

In [10]:
base_model_results = {}
for base_model in args['data']['base_model']:
    model = np.load(args['data']['base_model_path'] + f"/{base_model}.npy")
    ndcg_scores = []
    phar = tqdm(test_samples, desc=f"计算{base_model}测试集指标...")
    for case in phar:
        user_id = case['user_id']
        pos_item = case['pos_item']

        interaction_idx = data.get_interaction_index(user_id, data.item_to_id[pos_item])
        assert interaction_idx != -1

        predicted_item_ids = model[interaction_idx][2:2+10]
        true_item_ids = data.user_interacted_item_ids[user_id]
        true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

        predicted_items = np.array([predicted_item_ids])
        ndcg = nDCG(np.array(predicted_items), [true_item_ids])
        ndcg_scores.append(ndcg)

        phar.set_postfix(ndcg=ndcg)
    print(f"{base_model}: {np.mean(ndcg_scores)}")

计算acf测试集指标...: 100%|██████████| 25369/25369 [00:46<00:00, 541.11it/s, ndcg=[0]]                  


acf: 0.4407644595632349


计算fdsa测试集指标...: 100%|██████████| 25369/25369 [00:41<00:00, 618.68it/s, ndcg=[0]]                  


fdsa: 0.36706860929264074


计算harnn测试集指标...: 100%|██████████| 25369/25369 [00:38<00:00, 655.89it/s, ndcg=[0]]                  


harnn: 0.46209890972906853


计算caser测试集指标...: 100%|██████████| 25369/25369 [00:37<00:00, 673.25it/s, ndcg=[0]]                  


caser: 0.4352232341441272


计算pfmc测试集指标...: 100%|██████████| 25369/25369 [00:40<00:00, 628.41it/s, ndcg=[0]]                  


pfmc: 0.47393773746545487


计算sasrec测试集指标...: 100%|██████████| 25369/25369 [00:40<00:00, 622.14it/s, ndcg=[0]]                  


sasrec: 0.3920804733319215


计算anam测试集指标...: 100%|██████████| 25369/25369 [00:39<00:00, 637.46it/s, ndcg=[0]]                  


anam: 0.4484931343282081
