In [1]:
import yaml
import torch
import numpy as np
from tqdm import tqdm
from model.sem import Sem
from model.ensrec import EnsRec
from torch.utils.data import DataLoader
from data import Data, SeqBPRDataset

In [2]:
with open("config/bert_config.yaml", 'r', encoding='utf-8') as f:
    args = yaml.unsafe_load(f)
data = Data(args['data'])
train_samples = np.load('datasets/new_train_samples.npy', allow_pickle=True)
test_samples = np.load('datasets/new_test_samples.npy', allow_pickle=True)
train_dataset = SeqBPRDataset(train_samples, args['data']['device'])
test_dataset = SeqBPRDataset(test_samples, args['data']['device'], is_test=True)
train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)

>>>> 数据加载完成: 834449 条交互, 6033 个用户, 3123 个物品
>>>> 基模型的预测结果加载完成: (834449, 7, 102)


In [3]:
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

## 评估指标

- nDCG

In [4]:
def nDCG(rec_items, test_set):
    DCG = lambda x: np.sum(x / np.log(np.arange(2, len(x) + 2)))
    def get_implict_matrix(rec_items, test_set):
        rel_matrix = [[0] * rec_items.shape[1] for _ in range(rec_items.shape[0])]
        for user in range(len(test_set)):
            for index, item in enumerate(rec_items[user]):
                if item in test_set[user]:
                    rel_matrix[user][index] = 1
        return np.array(rel_matrix)
    rel_matrix = get_implict_matrix(rec_items, test_set)
    ndcgs = []
    for user in range(len(test_set)):
        rels = rel_matrix[user]
        dcg = DCG(rels)
        idcg = DCG(sorted(rels, reverse=True))
        ndcg = dcg / idcg if idcg != 0 else 0
        ndcgs.append(ndcg)
    return ndcgs

- map

In [None]:
def map(recommended_items, interacted_items):
    interacted_set = set(interacted_items)
    
    hits, precisions = [], []
    relevant_count = 0
    for i, item in enumerate(recommended_items):
        position = i + 1  # 位置从1开始计数

        is_relevant = item in interacted_set
        hits.append(1 if is_relevant else 0)
        if is_relevant:
            relevant_count += 1
            precision_at_k = relevant_count / position
            precisions.append(precision_at_k)
    
    if not precisions:
        return 0.0
    return sum(precisions) / len(precisions)

- mrr

In [6]:
def mrr(results, relevant_docs):
    relevant_set = set(relevant_docs)
    rank = 0
    for i, doc_id in enumerate(results):
        if doc_id in relevant_set:
            rank = i + 1  # 排名从1开始
            break
    if rank > 0:
        reciprocal_rank = 1.0 / rank
        return reciprocal_rank
    else:
        return 0.0

In [7]:
def auc1(y_prob, y_label):
    n = len(y_prob)
    pos_prob = []
    neg_prob = []
    for i in range(n):
        if y_label[i]==1:
            pos_prob.append(y_prob[i])
        elif y_label[i]==0:
            neg_prob.append(y_prob[i])
    # 正样本预测概率->负样本预测概率的占比
    count = 0
    for p in pos_prob:
        for n in neg_prob:
            if p>n:
                count += 1
            elif p==n:
                count += 0.5
    return count/(len(pos_prob)*len(neg_prob))

In [5]:
def infer(model, topk, metric):
    with torch.no_grad():
        metrics = []
        for batch in tqdm(test_loader, desc="计算测试集指标"):
            all_scores = model(batch, is_test=True)
            scores, indices = torch.topk(all_scores, topk)

            for i in range(len(batch['user_id'])):
                user_id = batch['user_id'][i].item()
                pos_item = batch['pos_item'][i].item()

                true_item_ids = data.user_interacted_item_ids[user_id]
                true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

                predicted_item_ids = np.array([indices[i].cpu().numpy().tolist()])
                if metric == 'map':
                    score = map(predicted_item_ids[0], true_item_ids)
                elif metric == 'ndcg':
                    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
                elif metric == 'mrr':
                    score = mrr(predicted_item_ids[0], true_item_ids)
                metrics.append(score)
    return np.mean(metrics)

## EnsRec

In [6]:
ensrec = EnsRec(args['model'], args['data'], data.n_user)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


>>>> 加载预计算的物品嵌入...


In [7]:
ckpt = torch.load(f"ckpt_fixbug/epoch1_0.3932.pth")
# filtered_ckpt = {k: v for k, v in ckpt.items() if not k.startswith('item_tower.cex')}
ensrec.load_state_dict(ckpt, strict=False)
ensrec.eval()

EnsRec(
  (cem): ContentExtractionModule(
    (llm): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 1024, padding_idx=1)
        (position_embeddings): Embedding(514, 1024, padding_idx=1)
        (token_type_embeddings): Embedding(1, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-23): 24 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=1024, out_features=1024, bias=True)
                (key): Linear(in_features=1024, out_features=1024, bias=True)
                (value): Linear(in_features=1024, out_features=1024, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutput(
                (dense): Linear(i

In [8]:
infer(ensrec, 10, 'ndcg')

计算测试集指标: 100%|██████████| 17/17 [00:09<00:00,  1.83it/s]


0.39324313141491474

## SEM

In [26]:
sem = Sem(args['model'], args['data'], data.n_user, 3952)
ckpt = torch.load(f"../bpr/ckpt_sem/sem_epoch3.pth")
sem.load_state_dict(ckpt, strict=False)
sem.eval()

KeyError: 'lamda'

In [None]:
infer(sem, 10, 'mrr')

## 其他集成方法

### CombSum

In [67]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    scores, indices = torch.topk(all_scores, 10)

    for i in range(len(batch['user_id'])):
        user_id = batch['user_id'][i].item()
        pos_item = batch['pos_item'][i].item()

        true_item_ids = data.user_interacted_item_ids[user_id]
        true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

        predicted_item_ids = np.array([indices[i].cpu().numpy().tolist()])
        score = nDCG(np.array(predicted_item_ids), [true_item_ids])
        metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 8345/8345 [00:12<00:00, 677.68it/s]


0.39389031190741475

### CombMNZ

In [47]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    item_ids_np = batch['base_model_preds'].cpu().numpy().reshape(-1)
    item_indices = torch.tensor(np.array([data.item_to_id[id] for id in item_ids_np]), device="cuda")
    all_item_counts = torch.bincount(item_indices, minlength=data.n_item)
    all_scores = all_scores * all_item_counts.unsqueeze(0)
    scores, indices = torch.topk(all_scores, 10)

    user_id = batch['user_id'].item()
    pos_item = batch['pos_item'].item()

    true_item_ids = data.user_interacted_item_ids[user_id]
    true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

    predicted_item_ids = np.array([indices[i].cpu().numpy().tolist()])
    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
    metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 8345/8345 [00:18<00:00, 451.36it/s]


0.39958100283838155

### ​CombANZ

In [82]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    item_ids_np = batch['base_model_preds'].cpu().numpy().reshape(-1)
    item_indices = torch.tensor(np.array([data.item_to_id[id] for id in item_ids_np]), device="cuda")
    all_item_counts = torch.bincount(item_indices, minlength=data.n_item)
    # 将计数为0的位置替换为无穷大
    all_item_counts[all_item_counts == 0] = 1_000_000_000
    all_scores = all_scores / all_item_counts.unsqueeze(0)
    scores, indices = torch.topk(all_scores, 10)

    user_id = batch['user_id'].item()
    pos_item = batch['pos_item'].item()

    true_item_ids = data.user_interacted_item_ids[user_id]
    true_item_ids = true_item_ids[true_item_ids.index(data.item_to_id[pos_item]) + 1:]

    predicted_item_ids = np.array([indices[i].cpu().numpy().tolist()])
    score = nDCG(np.array(predicted_item_ids), [true_item_ids])
    metrics.append(score)
np.mean(metrics)

计算测试集指标: 100%|██████████| 8345/8345 [00:17<00:00, 464.56it/s]


0.36816377854490245

## 基模型预测

In [None]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

base_model_results = {}
for base_model in args['data']['base_model']:
    print(f"加载{base_model}...")
    model = np.load(args['data']['base_model_path'] + f"/{base_model}.npy")
    ndcg_scores = []
    phar = tqdm(test_loader, desc="计算测试集指标...")
    for batch in phar:
        user_ids = batch['user_id']
        user_seq = batch['user_seq']
        pos_items = batch['pos_item']
        neg_items = batch['neg_item']
        all_item_scores = batch['all_item_scores']
        base_model_preds = batch['base_model_preds']

        user_id = user_ids.item()
        pos_item = pos_items.item()
        interaction_idx = data.get_interaction_index(data.id_to_user[user_id], pos_item)
        assert interaction_idx != -1

        predicted_items = model[interaction_idx][2:2+10]

        true_items = data.user_interacted_items[user_id]
        true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
        true_items = true_items[true_items.index(pos_item) + 1:]
        for j in range(len(true_items)):
            true_items[j] = data.item_to_id[true_items[j]]

        predicted_items = np.array([predicted_items])
        ndcg = nDCG(np.array(predicted_items), [true_items])
        ndcg_scores.append(ndcg)

        phar.set_postfix(ndcg=ndcg)
    base_model_results[base_model] = np.mean(ndcg_scores)
base_model_results