In [1]:
import yaml
import torch
import numpy as np
from tqdm import tqdm
from model.sem import Sem
from model.ensrec import EnsRec
from torch.utils.data import DataLoader
from data import Data, SeqBPRDataset

In [36]:
with open("config/qwen_config.yaml", 'r', encoding='utf-8') as f:
    args = yaml.unsafe_load(f)

In [2]:
with open("config/bert_config.yaml", 'r', encoding='utf-8') as f:
    args = yaml.unsafe_load(f)
data = Data(args['data'])
train_samples = np.load('datasets/train_samples.npy', allow_pickle=True)
test_samples = np.load('datasets/test_samples.npy', allow_pickle=True)
train_dataset = SeqBPRDataset(train_samples, args['data']['device'])
test_dataset = SeqBPRDataset(test_samples, args['data']['device'], is_test=True)
train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)

>>>> 数据加载完成: 834449 条交互, 6033 个用户, 3123 个物品


In [3]:
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

## 评估指标

- nDCG

In [None]:
def nDCG(rec_items, test_set):
    DCG = lambda x: np.sum(x / np.log(np.arange(2, len(x) + 2)))
    def get_implict_matrix(rec_items, test_set):
        rel_matrix = [[0] * rec_items.shape[1] for _ in range(rec_items.shape[0])]
        for user in range(len(test_set)):
            for index, item in enumerate(rec_items[user]):
                if item in test_set[user]:
                    rel_matrix[user][index] = 1
        return np.array(rel_matrix)
    rel_matrix = get_implict_matrix(rec_items, test_set)
    ndcgs = []
    for user in range(len(test_set)):
        rels = rel_matrix[user]
        dcg = DCG(rels)
        idcg = DCG(sorted(rels, reverse=True))
        ndcg = dcg / idcg if idcg != 0 else 0
        ndcgs.append(ndcg)
    return ndcgs

- map

In [None]:
def map(recommended_items, interacted_items):
    interacted_set = set(interacted_items)
    
    hits, precisions = [], []
    relevant_count = 0
    for i, item in enumerate(recommended_items):
        position = i + 1  # 位置从1开始计数

        is_relevant = item in interacted_set
        hits.append(1 if is_relevant else 0)
        if is_relevant:
            relevant_count += 1
            precision_at_k = relevant_count / position
            precisions.append(precision_at_k)
    
    if not precisions:
        return 0.0
    return sum(precisions) / len(precisions)

- mrr

In [6]:
def mrr(results, relevant_docs):
    relevant_set = set(relevant_docs)
    rank = 0
    for i, doc_id in enumerate(results):
        if doc_id in relevant_set:
            rank = i + 1  # 排名从1开始
            break
    if rank > 0:
        reciprocal_rank = 1.0 / rank
        return reciprocal_rank
    else:
        return 0.0

In [7]:
def auc1(y_prob, y_label):
    n = len(y_prob)
    pos_prob = []
    neg_prob = []
    for i in range(n):
        if y_label[i]==1:
            pos_prob.append(y_prob[i])
        elif y_label[i]==0:
            neg_prob.append(y_prob[i])
    # 正样本预测概率->负样本预测概率的占比
    count = 0
    for p in pos_prob:
        for n in neg_prob:
            if p>n:
                count += 1
            elif p==n:
                count += 0.5
    return count/(len(pos_prob)*len(neg_prob))

In [40]:
def infer(model, topk, metric):
    with torch.no_grad():
        metrics = []
        for batch in tqdm(test_loader, desc="计算测试集指标"):
            batch['all_item_scores'] = torch.Tensor(all_item_score((batch['base_model_preds']).cpu().numpy())).cuda()
            all_scores = model.predict(batch)
            scores, indices = torch.topk(all_scores, topk)

            for i in range(len(batch['user_id'])):
                user_id = batch['user_id'][i].item()
                pos_item = batch['pos_item'][i].item()

                true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
                true_items = true_items[true_items.index(pos_item) + 1:]
                for j in range(len(true_items)):
                    true_items[j] = data.item_to_id[true_items[j]]

                predicted_items = np.array([indices[i].cpu().numpy().tolist()])
                if metric == 'map':
                    score = map(predicted_items[0], true_items)
                elif metric == 'ndcg':
                    score = nDCG(np.array(predicted_items), [true_items])
                elif metric == 'mrr':
                    score = mrr(predicted_items[0], true_items)
                metrics.append(score)
    return np.mean(metrics)

## EnsRec

In [37]:
ensrec = EnsRec(args['model'], args['data'], data.n_user, 3952)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


>>>> 加载预计算的物品嵌入...


In [42]:
ckpt = torch.load(f"ckpt_ensrec_newdien_qwen/epoch8_0.3786.pth")
# filtered_ckpt = {k: v for k, v in ckpt.items() if not k.startswith('item_tower.cex')}
ensrec.load_state_dict(ckpt, strict=False)
ensrec.eval()

EnsRec(
  (cem): ContentExtractionModule(
    (llm): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 1024, padding_idx=1)
        (position_embeddings): Embedding(514, 1024, padding_idx=1)
        (token_type_embeddings): Embedding(1, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-23): 24 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=1024, out_features=1024, bias=True)
                (key): Linear(in_features=1024, out_features=1024, bias=True)
                (value): Linear(in_features=1024, out_features=1024, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutput(
                (dense): Linear(i

In [43]:
infer(ensrec, 10, 'ndcg')

计算测试集指标: 100%|██████████| 17/17 [00:02<00:00,  6.30it/s]


0.3790913834834136

## SEM

In [26]:
sem = Sem(args['model'], args['data'], data.n_user, 3952)
ckpt = torch.load(f"../bpr/ckpt_sem/sem_epoch3.pth")
sem.load_state_dict(ckpt, strict=False)
sem.eval()

KeyError: 'lamda'

In [None]:
infer(sem, 10, 'mrr')

## 其他集成方法

### CombSum

In [None]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = batch['all_item_scores'].squeeze(0).sum(dim=0, keepdim=True)
    scores, indices = torch.topk(all_scores, 10)

    for i in range(len(batch['user_id'])):
        user_id = batch['user_id'][i].item()
        pos_item = batch['pos_item'][i].item()

        true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
        true_items = true_items[true_items.index(pos_item) + 1:]
        for j in range(len(true_items)):
            true_items[j] = data.item_to_id[true_items[j]]

        predicted_items = np.array([indices[i].cpu().numpy().tolist()])
        score = nDCG(np.array(predicted_items), [true_items])
        metrics.append(score)
np.mean(metrics)

### CombMNZ

In [25]:
def all_item_score(dataset):
    """
    返回所有得分物品的函数

    Args:
        dataset (`np.ndarray`): 训练集或测试集, [n_samples, k, 2+rank]

    Returns:
        u_k_i (`np.ndarray`): 所有得分, [n_samples, k, n_item]
    """
    rank_chunk = dataset  # [batch, k, rank]
    n_samples, k, topk = rank_chunk.shape  # [batch, k, rank]
    rank_chunk_reshape = np.reshape(rank_chunk, [-1, topk])

    u_k_i = np.zeros([n_samples * k, 3123], dtype=np.float32)  # [batch, k, n_item]
    for i in range(topk):
        u_k_i[np.arange(len(u_k_i)), rank_chunk_reshape[:, i]] = 1 / (i + 10)
    return np.reshape(u_k_i, [n_samples, k, 3123])

In [None]:
mx = 0
for batch in test_loader:
    mx = min(torch.max(batch['base_model_preds']), mx)
mx

In [77]:
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
batch = next(iter(test_loader))
batch['base_model_preds']

tensor([[[ 325,    9, 1088,  ...,  707,   11, 1122],
         [ 425,  611, 1447,  ..., 1119,  584,  609],
         [1837, 1100, 1443,  ..., 1278,  218,   84],
         ...,
         [ 425, 1238, 1269,  ...,  368,  527,  672],
         [1447,  580, 1445,  ..., 1838, 1269,  681],
         [ 325,  517,  719,  ...,   63,   65,  533]],

        [[ 325,    9, 1088,  ...,  707,   11, 1122],
         [ 425, 1447,  674,  ...,  918, 1868,  609],
         [  11,   76,   62,  ...,   64,  116,  707],
         ...,
         [ 218,  708,  527,  ...,  864, 1295,  204],
         [2140, 1018, 2745,  ...,  897, 2421, 2620],
         [ 325,  517,  719,  ...,   63,   65,  533]],

        [[ 325,    9,  234,  ...,   41,  680,  667],
         [1447,  674,  188,  ...,  517,  123,  727],
         [  28,  123,   16,  ...,   71,   62,  533],
         ...,
         [ 218,  527,  708,  ...,  705, 1133,  812],
         [2140,  899, 1018,  ..., 2315,  897,  908],
         [ 325,  517,  719,  ...,   63,   65,  533]],

In [80]:
data.id_to_item[325]

912

In [71]:
batch['base_model_preds'][0][0][3]

tensor(533, device='cuda:0', dtype=torch.int32)

In [66]:
torch.max(batch['base_model_preds'])

tensor(3932, device='cuda:0', dtype=torch.int32)

In [78]:
# 对最后一维应用 data.id_to_item
for i in range(len(batch['base_model_preds'])):
    for j in range(len(batch['base_model_preds'][i])):
        for k in range(len(batch['base_model_preds'][i][j])):
            batch['base_model_preds'][i][j][k] = data.id_to_item[batch['base_model_preds'][i][j][k].item()]

In [79]:
batch['base_model_preds']

tensor([[[ 912, 1193,  910,  ...,  750,  260,  903],
         [ 345, 2124, 1081,  ...,   45, 1032, 2133],
         [3545, 3435, 2303,  ...,  922,  913, 3735],
         ...,
         [ 345,   11, 3614,  ..., 1569, 1269,  339],
         [1081, 2746, 3549,  ..., 1951, 3614, 2407],
         [ 912,  926,  923,  ..., 1945, 3095, 1230]],

        [[ 912, 1193,  910,  ...,  750,  260,  903],
         [ 345, 1081, 1127,  ...,  610,  410, 2133],
         [ 260, 1196, 2858,  ...,  593, 2396,  750],
         ...,
         [ 913, 1252, 1269,  ..., 1284, 3548, 2770],
         [1924, 1334, 2362,  ..., 1301, 2656, 2651],
         [ 912,  926,  923,  ..., 1945, 3095, 1230]],

        [[ 912, 1193, 1617,  ...,  527,  924,  969],
         [1081, 1127, 3552,  ...,  926, 2571, 2710],
         [2762, 2571, 2028,  ...,  110, 2858, 1230],
         ...,
         [ 913, 1269, 1252,  ...,  904, 1958,  141],
         [1924, 2554, 1334,  ...,  426, 1301, 3375],
         [ 912,  926,  923,  ..., 1945, 3095, 1230]],

In [52]:
torch.max(decoded_preds), torch.min(decoded_preds)

(tensor(3735), tensor(11))

In [None]:
all_scores = torch.Tensor(all_item_score(batch['base_model_preds'].cpu().numpy())).cuda().squeeze(0).sum(dim=0, keepdim=True)
all_scores[0][:20]

In [None]:
all_item_counts = torch.bincount(batch['base_model_preds'].squeeze(0).flatten(), minlength=3123)
all_item_counts.unsqueeze(0)[0][:20]

In [None]:
ans = all_scores * all_item_counts.unsqueeze(0)
ans[0][:20]

In [None]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
metrics = []
for batch in tqdm(test_loader, desc="计算测试集指标"):
    all_scores = torch.Tensor(all_item_score(batch['base_model_preds'].cpu().numpy())).cuda().squeeze(0).sum(dim=0, keepdim=True)
    all_item_counts = torch.bincount(batch['base_model_preds'].squeeze(0).flatten(), minlength=data.n_item)
    all_scores = all_scores * all_item_counts.unsqueeze(0)
    scores, indices = torch.topk(all_scores, 10)

    for i in range(len(batch['user_id'])):
        user_id = batch['user_id'][i].item()
        pos_item = batch['pos_item'][i].item()

        true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
        true_items = true_items[true_items.index(pos_item) + 1:]
        for j in range(len(true_items)):
            true_items[j] = data.item_to_id[true_items[j]]

        predicted_items = np.array([indices[i].cpu().numpy().tolist()])
        score = nDCG(np.array(predicted_items), [true_items])
        metrics.append(score)
np.mean(metrics)

## 基模型预测

In [None]:
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

base_model_results = {}
for base_model in args['data']['base_model']:
    print(f"加载{base_model}...")
    model = np.load(args['data']['base_model_path'] + f"/{base_model}.npy")
    ndcg_scores = []
    phar = tqdm(test_loader, desc="计算测试集指标...")
    for batch in phar:
        user_ids = batch['user_id']
        user_seq = batch['user_seq']
        pos_items = batch['pos_item']
        neg_items = batch['neg_item']
        all_item_scores = batch['all_item_scores']
        base_model_preds = batch['base_model_preds']

        user_id = user_ids.item()
        pos_item = pos_items.item()
        interaction_idx = data.get_interaction_index(data.id_to_user[user_id], pos_item)
        assert interaction_idx != -1

        predicted_items = model[interaction_idx][2:2+10]

        true_items = data.user_interacted_items[user_id]
        true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
        true_items = true_items[true_items.index(pos_item) + 1:]
        for j in range(len(true_items)):
            true_items[j] = data.item_to_id[true_items[j]]

        predicted_items = np.array([predicted_items])
        ndcg = nDCG(np.array(predicted_items), [true_items])
        ndcg_scores.append(ndcg)

        phar.set_postfix(ndcg=ndcg)
    base_model_results[base_model] = np.mean(ndcg_scores)
base_model_results