In [1]:
import yaml
import torch
import numpy as np
from tqdm import tqdm
from model.sem import Sem
from model.bpr_rec import BPRSeqLearn
from torch.utils.data import DataLoader
from data import Data

In [3]:
with open("config/bpr.yaml", 'r', encoding='utf-8') as f:
    args = yaml.unsafe_load(f)
args

{'base_path': 'D:/Code/graduation_design/',
 'topk': 10,
 'data': {'device': 'cuda:0',
  'train_test_split': 0.99,
  'base_model_topk': 100,
  'maxlen': 20,
  'name': 'ml-1m',
  'sep': '::',
  'item_path': 'D:/Code/graduation_design/data\\ml-1m\\movies.dat',
  'item_emb_path': 'D:/Code/graduation_design/data\\ml-1m\\item_embeddings.npy',
  'path': 'D:/Code/graduation_design/data\\ml-1m\\ratings.dat',
  'num_negatives': 1,
  'user_threshold': 10,
  'item_threshold': 10,
  'rating_threshold': 2,
  'base_model': ['acf', 'fdsa', 'harnn', 'caser', 'pfmc', 'sasrec', 'anam'],
  'base_model_path': 'D:/Code/graduation_design/base_model_results\\ml-1m'},
 'model': {'lr': 0.001,
  'type': 'SASEM',
  'lamda': 1e-05,
  'hidden_dim': 32,
  'device': 'cuda:0',
  'optimizer': 'AdamOptimizer',
  'tradeoff': 2,
  'div_module': 'cov',
  'pretrain_llm': 'bert-base-uncased',
  'path': 'D:/Code/graduation_design/bpr/ckpt_sigmoid'},
 'epoch': 10,
 'batch_size': 512}

In [4]:
data = Data(args['data'])
train_loader = DataLoader(data.train_dataset, batch_size=args['batch_size'], shuffle=True)

>>>> 数据加载完成: 834449 条交互, 6033 个用户, 3123 个物品
>>>> 基模型的预测结果加载完成: (834449, 7, 102)


>>>> 采样负样本: 100%|██████████| 6033/6033 [00:32<00:00, 184.13it/s]
>>>> 构建训练集: 100%|██████████| 826104/826104 [00:03<00:00, 234626.18it/s]


>>>> 生成了 826104 个训练样本


>>>> 构建测试集: 100%|██████████| 8345/8345 [00:00<00:00, 839041.78it/s]


>>>> 生成了 8345 个测试样本


In [27]:
test_loader = DataLoader(data.test_dataset, batch_size=1, shuffle=False)

In [34]:
batch = next(iter(train_loader))

In [45]:
for batch in test_loader:
    if torch.max(batch['all_item_scores']) > 0:
        print(batch['all_item_scores'])
        break

In [46]:
data.base_model_preds.shape

(834449, 7, 102)

In [47]:
test_size = 8345
base_model_preds_test = data.base_model_preds[-test_size:]
base_model_preds_test.shape

(8345, 7, 102)

In [84]:
rank_chunk = base_model_preds_test[:,:,2:2+100]  # [batch, k, rank]
n_samples, k, topk = rank_chunk.shape  # [batch, k, rank]
rank_chunk_reshape = np.reshape(rank_chunk, [-1, topk])
print(n_samples, k, topk, rank_chunk_reshape.shape)

8345 7 100 (58415, 100)


In [74]:
rank_chunk_reshape

array([[ 325,    9, 1088, ...,  100,  822,  543],
       [ 425,  611, 1447, ..., 1356,  548, 1671],
       [1837, 1100, 1443, ...,  501, 2252, 1735],
       ...,
       [  80,  982,  763, ...,  481,  265, 1944],
       [  87,  418,  293, ...,  378, 1032,   15],
       [  59, 1141,  854, ..., 1747, 2281,   14]], dtype=int64)

In [99]:
u_k_i = np.zeros([n_samples * k, data.n_item], dtype=np.float32)  # [batch, k, n_item]
u_k_i.shape, u_k_i

((58415, 3123),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32))

In [58]:
np.arange(len(u_k_i))

array([    0,     1,     2, ..., 58412, 58413, 58414])

In [62]:
rank_chunk_reshape[:,0], np.max(rank_chunk_reshape[:,0])

(array([ 325,  425, 1837, ...,   80,   87,   59], dtype=int64), 3095)

In [64]:
u_k_i[0, rank_chunk_reshape[:,0]] = 1 / 10
u_k_i[0, 320:330]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [100]:
for i in range(topk):
    u_k_i[np.arange(len(u_k_i)), rank_chunk_reshape[:, i]] = 1 / (i + 10)
np.max(u_k_i), topk

(0.1, 100)

In [None]:
u_k_i = np.zeros((n_samples * k, data.n_item))
print("\n初始评分矩阵:")
print(u_k_i.shape)

# 逐位置填充评分
for i in range(topk):
    u_k_i[np.arange(len(u_k_i)), rank_chunk_reshape[:, i]] = 1 / (i + 10)
    print(u_k_i)
np.max(u_k_i)


初始评分矩阵:
(58415, 3123)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0

0

In [72]:
nums = list(data.user_interacted_items.values())
mx = 0
for i in range(len(nums)):
    for j in range(len(nums[i])):
        mx = max(mx, data.item_to_id[nums[i][j]])
mx


3122

In [None]:
sem.item_embeddings = sem.item_embeddings.to(sem.device)
seq_emb = sem.item_embeddings(user_seq)
user_emb = sem.user_embeddings(user_ids)  # [batch_size, hidden_dim]

# 添加位置编码
positions = torch.arange(sem.seq_max_len, device=sem.device).expand(user_seq.size(0), -1)
seq_emb = seq_emb + sem.pos_embedding(positions)

# 创建注意力掩码
mask = (user_seq == -1)
output = sem.user_encoder(seq_emb.transpose(0,1), src_key_padding_mask=mask).transpose(0,1)
preference = output[:,-1,:] + user_emb

base_model_emb = sem.item_embeddings(base_model_preds)  # [batch_size, n_base_model, seq_len, hidden_dim]

# 时间衰减权重
time_weights = 1.0 / torch.log2(torch.arange(sem.seq_max_len, device=sem.device) + 2)
time_weights = time_weights.view(1, 1, -1, 1)

basemodel_emb = sem.base_model_embeddings + torch.sum(time_weights * base_model_emb, dim=2)

# 计算基模型权重
wgts_org = torch.sum(preference.unsqueeze(1) * basemodel_emb, dim=-1)  # [batch_size, n_base_model]
import torch.nn.functional as F
wgts = F.softmax(wgts_org, dim=-1)

all_scores = torch.sum(wgts.unsqueeze(2) * all_item_scores, dim=1)  # bc

scores, indices = torch.topk(all_scores, 10)
scores, indices

## 改进模型

In [5]:
model = BPRSeqLearn(args['model'], args['data'], data.n_user, data.n_item)
ckpt = torch.load(f"../bpr/ckpt_score_sum/bpr_epoch3.pth")
filtered_ckpt = {k: v for k, v in ckpt.items() if not k.startswith('item_tower.cex')}
model.load_state_dict(filtered_ckpt, strict=False)
model.eval()

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


>>>> 加载预计算的物品嵌入...


BPRSeqLearn(
  (cem): ContentExtractionModule(
    (llm): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 1024, padding_idx=1)
        (position_embeddings): Embedding(514, 1024, padding_idx=1)
        (token_type_embeddings): Embedding(1, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-23): 24 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=1024, out_features=1024, bias=True)
                (key): Linear(in_features=1024, out_features=1024, bias=True)
                (value): Linear(in_features=1024, out_features=1024, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutput(
                (dense): Lin

In [7]:
test_loader = DataLoader(data.test_dataset, batch_size=args['batch_size'], shuffle=False)
with torch.no_grad():
    ndcg_scores = []
    for batch in tqdm(test_loader, desc="计算测试集NDCG"):
        user_ids = batch['user_id']
        user_seq = batch['user_seq']
        pos_items = batch['pos_item']
        neg_items = batch['neg_item']
        all_item_scores = batch['all_item_scores']
        base_model_preds = batch['base_model_preds']

        all_scores = model.predict(user_ids, user_seq, pos_items, neg_items, all_item_scores, base_model_preds)
        scores, indices = torch.topk(all_scores, 10)
        indices += 1

        for i in range(len(user_ids)):
            user_id = user_ids[i].item()
            pos_item = pos_items[i].item()

            true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
            true_items = true_items[true_items.index(pos_items[i]) + 1:]
            for j in range(len(true_items)):
                true_items[j] = data.item_to_id[true_items[j]]

            predicted_items = np.array([indices[i].cpu().numpy().tolist()])
            ndcg = nDCG(np.array(predicted_items), [true_items])
            ndcg_scores.append(ndcg)

np.mean(ndcg_scores)

计算测试集NDCG:   0%|          | 0/17 [00:01<?, ?it/s]


NameError: name 'nDCG' is not defined

In [None]:
for batch in tqdm(test_loader, desc="计算测试集NDCG"):
    user_ids = batch['user_id']
    user_seq = batch['user_seq']
    pos_items = batch['pos_item']
    neg_items = batch['neg_item']
    all_item_scores = batch['all_item_scores']
    base_model_preds = batch['base_model_preds']
    if user_seq.shape[0] != 

In [13]:
with torch.no_grad():
    ndcg_scores = []
    for batch in tqdm(test_loader, desc="计算测试集NDCG"):
        user_ids = batch['user_id']
        user_seq = batch['user_seq']
        pos_items = batch['pos_item']
        neg_items = batch['neg_item']
        all_item_scores = batch['all_item_scores']
        base_model_preds = batch['base_model_preds']

        user_emb = model.user_embeddings(user_ids)  # batch_size, hidden_dim

        # user 侧
        user_interaction = model.item_tower(user_seq)  # bc, seq_len, hidden_dim
        preference = model.dien_with_self_attention(user_interaction)[:, -1, :] + user_emb  # bc, hidden_dim

        # item 侧
        base_model_focus_llm = model._convert_focus_to_llm_embeddings(base_model_preds)  # bc, n_base_model, seq_len, hidden_dim
        each_model_emb = model.llm_projection(base_model_focus_llm)  # bc, n_base_model, seq_len, hidden_dim
        # basemodel_emb = each_model_emb.mean(dim=2)  # bc, n_base_model, hidden_dim

        # 时间衰减权重
        time_weights = 1.0 / torch.log2(torch.arange(model.seq_max_len, device=model.device) + 2)
        time_weights = time_weights.view(1, 1, -1, 1)

        basemodel_emb = torch.sum(time_weights * each_model_emb, dim=2)  # [bc, n_base_model, hidden_dim]

        # [bc, n_base_model, hidden_dim] @ [bc, 1, hidden_dim] -> [bc, n_base_model, 1]
        preference = preference.unsqueeze(1).transpose(-2, -1)  # [batch_size, hidden_dim, 1]
        wgts_org = torch.matmul(basemodel_emb, preference).squeeze(-1)
        
        # 计算基模型权重
        # wgts_org = torch.sum(preference.unsqueeze(1) * basemodel_emb, dim=-1)  # bc, n_base_model
        from torch.nn import functional as F
        wgts = F.softmax(wgts_org, dim=-1)  # bc, n_base_model
        all_scores = torch.matmul(wgts.unsqueeze(1), all_item_scores).squeeze(1)

        # 计算所有物品得分
        # pred_all_item_scores = torch.sum(wgts * all_item_scores, dim=1)  # bc
        scores, indices = torch.topk(all_scores, 10)
        indices += 1

        for i in range(len(user_ids)):
            user_id = user_ids[i].item()
            pos_item = pos_items[i].item()

            true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
            true_items = true_items[true_items.index(pos_items[i]) + 1:]
            for j in range(len(true_items)):
                true_items[j] = data.item_to_id[true_items[j]]

            predicted_items = np.array([indices[i].cpu().numpy().tolist()])
            ndcg = nDCG(np.array(predicted_items), [true_items])
            ndcg_scores.append(ndcg)

np.mean(ndcg_scores)

计算测试集NDCG: 100%|██████████| 17/17 [02:06<00:00,  7.43s/it]


0.33310487298851466

## SEM

In [14]:
sem = Sem(args['model'], args['data'], data.n_user, 3952)
ckpt = torch.load(f"../bpr/ckpt_sem/sem_epoch3.pth")
sem.load_state_dict(ckpt, strict=False)
sem.eval()

model.device: cuda:0




Sem(
  (user_embeddings): Embedding(6033, 32)
  (item_embeddings): Embedding(3953, 32)
  (user_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
        )
        (linear1): Linear(in_features=32, out_features=32, bias=True)
        (dropout): Dropout(p=0.5, inplace=False)
        (linear2): Linear(in_features=32, out_features=32, bias=True)
        (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.5, inplace=False)
        (dropout2): Dropout(p=0.5, inplace=False)
      )
    )
  )
  (pos_embedding): Embedding(20, 32)
)

In [16]:
test_loader = DataLoader(data.test_dataset, batch_size=args['batch_size'], shuffle=False)

with torch.no_grad():
    ndcg_scores = []
    for batch in tqdm(test_loader, desc="计算测试集NDCG"):
        user_ids = batch['user_id']
        user_seq = batch['user_seq']
        pos_items = batch['pos_item']
        neg_items = batch['neg_item']
        all_item_scores = batch['all_item_scores']
        base_model_preds = batch['base_model_preds']

        sem.item_embeddings = sem.item_embeddings.to(sem.device)
        seq_emb = sem.item_embeddings(user_seq)
        user_emb = sem.user_embeddings(user_ids)  # [batch_size, hidden_dim]

        # 添加位置编码
        positions = torch.arange(sem.seq_max_len, device=sem.device).expand(user_seq.size(0), -1)
        seq_emb = seq_emb + sem.pos_embedding(positions)

        # 创建注意力掩码
        mask = (user_seq == -1)
        output = sem.user_encoder(seq_emb.transpose(0,1), src_key_padding_mask=mask).transpose(0,1)
        preference = output[:,-1,:] + user_emb  # [bc, hidden_dim]

        base_model_emb = sem.item_embeddings(base_model_preds)  # [bc, n_base_model, seq_len, hidden_dim]

        # 时间衰减权重
        time_weights = 1.0 / torch.log2(torch.arange(sem.seq_max_len, device=sem.device) + 2)
        time_weights = time_weights.view(1, 1, -1, 1)  # [1, 1, seq_len, 1]

        basemodel_emb = sem.base_model_embeddings + torch.sum(time_weights * base_model_emb, dim=2)

        # 计算基模型权重
        wgts_org = torch.sum(preference.unsqueeze(1) * basemodel_emb, dim=-1)  # [batch_size, n_base_model]
        import torch.nn.functional as F
        wgts = F.softmax(wgts_org, dim=-1)

        all_scores = torch.sum(wgts.unsqueeze(2) * all_item_scores, dim=1)  # bc

        # all_scores = model.predict(user_ids, user_seq, all_item_scores, base_model_preds)
        scores, indices = torch.topk(all_scores, 10)
        indices += 1

        for i in range(len(user_ids)):
            user_id = user_ids[i].item()
            pos_item = pos_items[i].item()

            true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
            true_items = true_items[true_items.index(pos_items[i]) + 1:]
            for j in range(len(true_items)):
                true_items[j] = data.item_to_id[true_items[j]]

            predicted_items = np.array([indices[i].cpu().numpy().tolist()])
            ndcg = nDCG(np.array(predicted_items), [true_items])
            ndcg_scores.append(ndcg)

np.mean(ndcg_scores)

计算测试集NDCG: 100%|██████████| 17/17 [02:05<00:00,  7.41s/it]


0.3270357652490906

## 基模型预测值取平均

In [10]:
test_loader = DataLoader(data.test_dataset, batch_size=1, shuffle=False)

ndcg_scores = []
phar = tqdm(test_loader, desc="计算NDCG@10...")
for batch in phar:
    user_ids, user_seq, pos_items, neg_items, all_item_scores, base_model_preds = batch

    user_id = user_ids.item()
    pos_item = pos_items.item()
    interaction_idx = data.get_interaction_index(data.id_to_user[user_id], pos_item)
    assert interaction_idx != -1

    predicted_items = model[interaction_idx][2:2+20]

    predicted_items += 1

    # 获取用户的实际交互物品
    true_items = data.user_interacted_items[user_id]
    true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
    true_items = true_items[true_items.index(pos_item) + 1:]

    ndcg = nDCG(np.array(np.array([predicted_items])), [true_items])
    ndcg_scores.append(ndcg)

    phar.set_postfix(ndcg=ndcg)

np.mean(ndcg_scores)

计算NDCG@10...:   0%|          | 0/8345 [00:00<?, ?it/s]


AttributeError: 'str' object has no attribute 'item'

## 基模型预测

In [17]:
acf = np.load(args['data']['base_model_path'] + f"/acf.npy")

In [25]:
test_loader = DataLoader(data.test_dataset, batch_size=1, shuffle=False)
model = np.load(args['data']['base_model_path'] + f"/pfmc.npy")

ndcg_scores = []
phar = tqdm(test_loader, desc="计算NDCG@10...")
for batch in phar:
    user_ids = batch['user_id']
    user_seq = batch['user_seq']
    pos_items = batch['pos_item']
    neg_items = batch['neg_item']
    all_item_scores = batch['all_item_scores']
    base_model_preds = batch['base_model_preds']

    user_id = user_ids.item()
    pos_item = pos_items.item()
    interaction_idx = data.get_interaction_index(data.id_to_user[user_id], pos_item)
    assert interaction_idx != -1

    predicted_items = model[interaction_idx][2:2+10]

    predicted_items += 1

    # 获取用户的实际交互物品
    true_items = data.user_interacted_items[user_id]
    true_items = data.user_interacted_items[data.id_to_user[user_id].item()]
    true_items = true_items[true_items.index(pos_item) + 1:]
    for j in range(len(true_items)):
        true_items[j] = data.item_to_id[true_items[j]]

    ndcg = nDCG(np.array(np.array([predicted_items])), [true_items])
    ndcg_scores.append(ndcg)

    phar.set_postfix(ndcg=ndcg)

np.mean(ndcg_scores)

计算NDCG@10...: 100%|██████████| 8345/8345 [00:26<00:00, 319.76it/s, ndcg=[0]]                  


0.34919322397522734

In [None]:
all_scores = model.predict(user_ids, user_seq, pos_items, base_model_preds)
_, indices = torch.topk(all_scores, 10)
indices

In [None]:
scores, indices = torch.topk(all_scores, 10)
scores, indices + 1

In [None]:
true_items = generator.user_interacted_items[generator.id_to_user[user_ids.item()].item()]
len(true_items), true_items[:5], pos_items

In [None]:
true_items = generator.user_interacted_items[generator.id_to_user[user_ids.item()].item()]
len(true_items), true_items[:5], pos_items

In [None]:
true_items_clip = true_items[true_items.index(pos_items.item()) + 1:]
len(true_items_clip), true_items_clip[:10]

In [None]:
x = torch.tensor(2863).unsqueeze(0).to(model.device)
y = torch.tensor(1).unsqueeze(0).to(model.device)

pos_score, neg_score = model(user_ids, user_seq, pos_items, neg_items, base_model_preds)
pos_score, neg_score

In [None]:
x = torch.tensor(2863).unsqueeze(0).to(model.device)
y = torch.tensor(1).unsqueeze(0).to(model.device)

pos_score, neg_score = model(user_ids, user_seq, pos_items, neg_items, base_model_preds)
pos_score, neg_score

In [None]:
my_data = DataLoader(train_dataset, batch_size=1, shuffle=True)
user_ids, user_seq, pos_items, neg_items, base_model_preds = next(iter(my_data))
user_ids, user_seq, pos_items, neg_items

In [None]:
from data import BPRLoss
loss = BPRLoss()

In [8]:
import numpy as np
np.random.seed(2021)
 
class Model:
    def __init__(self, k):
        self.k = k
        self.item_size = 50
 
    def __call__(self, users):
        # 模型随机返回 k 个 item,模拟推荐结果
        res = np.random.randint(0, self.item_size, users.shape[0] * self.k)
        return res.reshape((users.shape[0], -1))
 
 
def get_implict_matrix(rec_items, test_set):
    rel_matrix = [[0] * rec_items.shape[1] for _ in range(rec_items.shape[0])]
    for user in range(len(test_set)):
        for index, item in enumerate(rec_items[user]):
            if item in test_set[user]:
                rel_matrix[user][index] = 1
    return np.array(rel_matrix)
 
 
def nDCG(rec_items, test_set):
    DCG = lambda x: np.sum(x / np.log(np.arange(2, len(x) + 2)))
    def get_implict_matrix(rec_items, test_set):
        rel_matrix = [[0] * rec_items.shape[1] for _ in range(rec_items.shape[0])]
        for user in range(len(test_set)):
            for index, item in enumerate(rec_items[user]):
                if item in test_set[user]:
                    rel_matrix[user][index] = 1
        return np.array(rel_matrix)
    rel_matrix = get_implict_matrix(rec_items, test_set)
    ndcgs = []
    for user in range(len(test_set)):
        rels = rel_matrix[user]
        dcg = DCG(rels)
        idcg = DCG(sorted(rels, reverse=True))
        ndcg = dcg / idcg if idcg != 0 else 0
        ndcgs.append(ndcg)
    return ndcgs
 
 
# 假设 top-20 推荐,一共 5 个 user, 50 个 item ,隐式反馈数据集.
users = np.array([0])
# test_set 表示 5 个用户在测试集中分表交互过那些 item
test_set = [
    [0, 21, 31, 41, 49]
]
rec_items=np.array([
    [0,  9,  5,  6, 7, 50, 8, 31, 21, 1]
])
# model = Model(20)
# rec_items = model(users)
print("truth click", test_set)
print("rec_items", rec_items)
ndcgs = nDCG(rec_items, test_set)
print(ndcgs)
 
print('-'*10)
 
dcg=1/np.log(2)+1/np.log(9)+1/np.log(10)
idcg=1/np.log(2)+1/np.log(3)+1/np.log(4)
ndcg=(1/np.log(2)+1/np.log(9)+1/np.log(10))/(1/np.log(2)+1/np.log(3)+1/np.log(4))
print(dcg, idcg, ndcg)

truth click [[0, 21, 31, 41, 49]]
rec_items [[ 0  9  5  6  7 50  8 31 21  1]]
[0.758586654365518]
----------
2.332109136105634 3.074281787960283 0.758586654365518


In [None]:
rank_chunk = np.array([
    [[0, 1, 2], [2, 3, 4]],  # 第一个样本的排名结果
    [[1, 2, 3], [3, 4, 0]]   # 第二个样本的排名结果
])

In [None]:
n_samples, k, topk = rank_chunk.shape  # [batch, k, rank]
rank_chunk_reshape = np.reshape(rank_chunk, [-1, topk])
rank_chunk_reshape

In [None]:
u_k_i = np.zeros([n_samples * k, 5])
u_k_i

In [None]:
np.arange(len(u_k_i))

In [None]:
for i in range(topk):
    u_k_i[np.arange(len(u_k_i)), rank_chunk_reshape[:, i]] = 1 / (i + 10)
u_k_i