In [1]:
import yaml
import torch
import numpy as np
from tqdm import tqdm
from model.sem import Sem
from model.ensrec import EnsRec
from torch.utils.data import DataLoader
from data import Data, SeqBPRDataset

In [None]:
with open("config/general.yaml", 'r', encoding='utf-8') as f:
    args = yaml.unsafe_load(f)
data = Data(args['data'])
train_samples = np.load('datasets/train_samples.npy', allow_pickle=True)
test_samples = np.load('datasets/test_samples.npy', allow_pickle=True)
train_dataset = SeqBPRDataset(train_samples, args['data']['device'])
test_dataset = SeqBPRDataset(test_samples, args['data']['device'], is_test=True)
train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)

In [3]:
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

## 评估指标

- nDCG

In [5]:
def nDCG(rec_items, test_set):
    DCG = lambda x: np.sum(x / np.log(np.arange(2, len(x) + 2)))
    def get_implict_matrix(rec_items, test_set):
        rel_matrix = [[0] * rec_items.shape[1] for _ in range(rec_items.shape[0])]
        for user in range(len(test_set)):
            for index, item in enumerate(rec_items[user]):
                if item in test_set[user]:
                    rel_matrix[user][index] = 1
        return np.array(rel_matrix)
    rel_matrix = get_implict_matrix(rec_items, test_set)
    ndcgs = []
    for user in range(len(test_set)):
        rels = rel_matrix[user]
        dcg = DCG(rels)
        idcg = DCG(sorted(rels, reverse=True))
        ndcg = dcg / idcg if idcg != 0 else 0
        ndcgs.append(ndcg)
    return ndcgs

- map

In [48]:
def map(recommended_items, interacted_items):
    interacted_set = set(interacted_items)
    
    hits, precisions = [], []
    relevant_count = 0
    for i, item in enumerate(recommended_items):
        position = i + 1  # 位置从1开始计数

        is_relevant = item in interacted_set
        hits.append(1 if is_relevant else 0)
        if is_relevant:
            relevant_count += 1
            precision_at_k = relevant_count / position
            precisions.append(precision_at_k)
    
    if not precisions:
        return 0.0
    return sum(precisions) / len(precisions)

- mpp

In [82]:
def mrr(results, relevant_docs):
    relevant_set = set(relevant_docs)
    rank = 0
    for i, doc_id in enumerate(results):
        if doc_id in relevant_set:
            rank = i + 1  # 排名从1开始
            break
    if rank > 0:
        reciprocal_rank = 1.0 / rank
        return reciprocal_rank
    else:
        return 0.0

In [61]:
def auc1(y_prob, y_label):
    n = len(y_prob)
    pos_prob = []
    neg_prob = []
    for i in range(n):
        if y_label[i]==1:
            pos_prob.append(y_prob[i])
        elif y_label[i]==0:
            neg_prob.append(y_prob[i])
    #正样本预测概率》负样本预测概率的占比
    count = 0
    for p in pos_prob:
        for n in neg_prob:
            if p>n:
                count += 1
            elif p==n:
                count += 0.5
    return count/(len(pos_prob)*len(neg_prob))

In [8]:
def infer(model, topk, metric):
    with torch.no_grad():
        metrics = []
        for batch in tqdm(test_loader, desc="计算测试集指标"):
            all_scores = model.predict(batch)
            scores, indices = torch.topk(all_scores, topk)

            for i in range(len(batch['user_id'])):
                user_id = batch['user_id'][i].item()
                pos_item = batch['pos_item'][i].item()

                true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
                true_items = true_items[true_items.index(pos_item) + 1:]
                for j in range(len(true_items)):
                    true_items[j] = data.item_to_id[true_items[j]]

                predicted_items = np.array([indices[i].cpu().numpy().tolist()])
                if metric == 'map':
                    score = map(predicted_items[0], true_items)
                elif metric == 'ndcg':
                    score = nDCG(np.array(predicted_items), [true_items])
                elif metric == 'mrr':
                    score = mrr(predicted_items[0], true_items)
                metrics.append(score)
    return np.mean(metrics)

## EnsRec

In [None]:
ensrec = EnsRec(args['model'], args['data'], data.n_user, 3952)

In [None]:
ckpt = torch.load(f"ckpt_ensrec/epoch6.pth")
# filtered_ckpt = {k: v for k, v in ckpt.items() if not k.startswith('item_tower.cex')}
ensrec.load_state_dict(ckpt, strict=False)
ensrec.eval()

In [None]:
infer(ensrec, 10, 'ndcg')

## SEM

In [None]:
sem = Sem(args['model'], args['data'], data.n_user, 3952)
ckpt = torch.load(f"../bpr/ckpt_sem/sem_epoch3.pth")
sem.load_state_dict(ckpt, strict=False)
sem.eval()

In [None]:
infer(sem, 10, 'mrr')

## 基模型预测值取平均

In [None]:
test_loader = DataLoader(data.test_dataset, batch_size=1, shuffle=False)

ndcg_scores = []
phar = tqdm(test_loader, desc="计算NDCG@10...")
for batch in phar:
    user_ids, user_seq, pos_items, neg_items, all_item_scores, base_model_preds = batch

    user_id = user_ids.item()
    pos_item = pos_items.item()
    interaction_idx = data.get_interaction_index(data.id_to_user[user_id], pos_item)
    assert interaction_idx != -1

    predicted_items = model[interaction_idx][2:2+20]

    predicted_items += 1

    # 获取用户的实际交互物品
    true_items = data.user_interacted_items[user_id]
    true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
    true_items = true_items[true_items.index(pos_item) + 1:]

    ndcg = nDCG(np.array(np.array([predicted_items])), [true_items])
    ndcg_scores.append(ndcg)

    phar.set_postfix(ndcg=ndcg)

np.mean(ndcg_scores)

## 基模型预测

In [None]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

base_model_results = {}
for base_model in args['data']['base_model']:
    print(f"加载{base_model}...")
    model = np.load(args['data']['base_model_path'] + f"/{base_model}.npy")
    ndcg_scores = []
    phar = tqdm(test_loader, desc="计算测试集指标...")
    for batch in phar:
        user_ids = batch['user_id']
        user_seq = batch['user_seq']
        pos_items = batch['pos_item']
        neg_items = batch['neg_item']
        all_item_scores = batch['all_item_scores']
        base_model_preds = batch['base_model_preds']

        user_id = user_ids.item()
        pos_item = pos_items.item()
        interaction_idx = data.get_interaction_index(data.id_to_user[user_id], pos_item)
        assert interaction_idx != -1

        predicted_items = model[interaction_idx][2:2+10]

        true_items = data.user_interacted_items[user_id]
        true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
        true_items = true_items[true_items.index(pos_item) + 1:]
        for j in range(len(true_items)):
            true_items[j] = data.item_to_id[true_items[j]]

        ndcg = mrr(predicted_items, true_items)
        ndcg_scores.append(ndcg)

        phar.set_postfix(ndcg=ndcg)
    base_model_results[base_model] = np.mean(ndcg_scores)
base_model_results