In [1]:
from NN_evaluate import *

model = Model(config).to(device)
model.eval()

NAML(
  (news_encoder): NewsEncoder(
    (text_encoders): ModuleDict(
      (abstract): TextEncoder(
        (word_embedding): Embedding(70976, 300, padding_idx=0)
        (CNN): Conv2d(1, 300, kernel_size=(3, 300), stride=(1, 1), padding=(1, 0))
        (additive_attention): AdditiveAttention(
          (linear): Linear(in_features=300, out_features=200, bias=True)
        )
      )
      (title): TextEncoder(
        (word_embedding): Embedding(70976, 300, padding_idx=0)
        (CNN): Conv2d(1, 300, kernel_size=(3, 300), stride=(1, 1), padding=(1, 0))
        (additive_attention): AdditiveAttention(
          (linear): Linear(in_features=300, out_features=200, bias=True)
        )
      )
    )
    (element_encoders): ModuleDict(
      (category): ElementEncoder(
        (embedding): Embedding(275, 100, padding_idx=0)
        (linear): Linear(in_features=100, out_features=300, bias=True)
      )
    )
    (final_attention): AdditiveAttention(
      (linear): Linear(in_features=300, 

In [2]:
directory = '../data/NewsNudge'
num_workers = config.num_workers
max_count = sys.maxsize

news_dataset = NewsDataset(path.join(directory, 'NN_news_parsed.tsv'))
news_dataloader = DataLoader(news_dataset,
                                batch_size=config.batch_size * 16,
                                shuffle=False,
                                num_workers=config.num_workers,
                                drop_last=False,
                                pin_memory=True)

news2vector = {}
for minibatch in tqdm(news_dataloader,
                        desc="Calculating vectors for news"):
    news_ids = minibatch["id"]
    if any(id not in news2vector for id in news_ids):
        news_vector = model.get_news_vector(minibatch)
        for id, vector in zip(news_ids, news_vector):
            if id not in news2vector:
                news2vector[id] = vector

news2vector['PADDED_NEWS'] = torch.zeros(
    list(news2vector.values())[0].size())

Calculating vectors for news: 100%|██████████| 1/1 [00:02<00:00,  2.35s/it]


In [3]:
user_dataset = UserDataset(path.join(directory, 'NN_behaviors.tsv'),
                            '../data/NewsNudge/NN_user2int.tsv')
user_dataloader = DataLoader(user_dataset,
                                batch_size=config.batch_size * 16,
                                shuffle=False,
                                num_workers=config.num_workers,
                                drop_last=False,
                                pin_memory=True)

user2vector = {}
for minibatch in tqdm(user_dataloader,
                        desc="Calculating vectors for users"):
    user_strings = minibatch["clicked_news_string"]
    if any(user_string not in user2vector for user_string in user_strings):
        clicked_news_vector = torch.stack([
            torch.stack([news2vector[x].to(device) for x in news_list],
                        dim=0) for news_list in minibatch["clicked_news"]
        ],
                                            dim=0).transpose(0, 1)
        if model_name == 'LSTUR':
            user_vector = model.get_user_vector(
                minibatch['user'], minibatch['clicked_news_length'],
                clicked_news_vector)
        else:
            user_vector = model.get_user_vector(clicked_news_vector)
        for user, vector in zip(user_strings, user_vector):
            if user not in user2vector:

                user2vector[user] = vector

Calculating vectors for users: 100%|██████████| 1/1 [00:06<00:00,  6.78s/it]


In [4]:
behaviors_dataset = BehaviorsDataset(path.join(directory, 'NN_behaviors.tsv'))
behaviors_dataloader = DataLoader(behaviors_dataset,
                                    batch_size=1,
                                    shuffle=False,
                                    num_workers=config.num_workers)

In [15]:
candidate_news_vector = news2vector['N4']
user_vector = user2vector['N0 N1 N2 N3']

In [17]:
from model.general.click_predictor.dot_product import DotProductClickPredictor
click_predictor = DotProductClickPredictor()

In [19]:
candidate_news_vector.unsqueeze(dim=0).shape

torch.Size([1, 300])

In [24]:
torch.dot(news2vector['N5'], user_vector)

tensor(1.0572, grad_fn=<DotBackward0>)

In [25]:
torch.dot(news2vector['N4'], user_vector)

tensor(1.1733, grad_fn=<DotBackward0>)

In [18]:
click_predictor(candidate_news_vector.unsqueeze(dim=0), user_vector.unsqueeze(dim=0))

RuntimeError: batch1 must be a 3D tensor

AttributeError: 'tuple' object has no attribute 'squeeze'

In [10]:
model.get_prediction(candidate_news_vector, user_vector)

RuntimeError: batch1 must be a 3D tensor

In [None]:





    count = 0

    tasks = []

    for minibatch in tqdm(behaviors_dataloader,
                          desc="Calculating probabilities"):
        count += 1
        if count == max_count:
            break
        print('before ---------------')
        candidate_news_vector = torch.stack([
            news2vector[news[0].split('-')[0]]
            for news in minibatch['impressions']
        ],
                                            dim=0)
        print('after -------------')
        user_vector = user2vector[minibatch['clicked_news_string'][0]]
        click_probability = model.get_prediction(candidate_news_vector,
                                                 user_vector)

        y_pred = click_probability.tolist()
        y_true = [
            int(news[0].split('-')[1]) for news in minibatch['impressions']
        ]

        tasks.append((y_true, y_pred))

    with Pool(processes=num_workers) as pool:
        results = pool.map(calculate_single_user_metric, tasks)

    aucs, mrrs, ndcg5s, ndcg10s = np.array(results).T
    return np.nanmean(aucs), np.nanmean(mrrs), np.nanmean(ndcg5s), np.nanmean(
        ndcg10s)

Calculating vectors for news: 100%|██████████| 1/1 [00:02<00:00,  2.23s/it]
Calculating vectors for users: 100%|██████████| 1/1 [00:06<00:00,  6.84s/it]


{'N0': tensor([0.1376, 0.1147, 0.1934, 0.2844, 0.2133, 0.5514, 0.1780, 0.0857, 0.1166,
        0.0814, 0.1260, 0.2140, 0.1292, 0.3674, 0.0950, 0.1165, 0.5374, 0.1074,
        0.1394, 0.1172, 0.1989, 0.1393, 0.1076, 0.3027, 0.1198, 0.1364, 0.1157,
        0.1090, 0.1381, 0.3676, 0.3471, 0.0715, 0.2496, 0.1741, 0.1697, 0.1456,
        0.1080, 0.1889, 0.1161, 0.1321, 0.1018, 0.1299, 0.1056, 0.1279, 0.1305,
        0.1454, 0.1614, 0.2847, 0.1031, 0.4811, 0.1027, 0.1602, 0.1827, 0.1241,
        0.1129, 0.1921, 0.2036, 0.1209, 0.2658, 0.1053, 0.1741, 0.1526, 0.2308,
        0.1867, 0.1637, 0.2980, 0.1073, 0.1937, 0.3102, 0.0566, 0.0890, 0.3033,
        0.1145, 0.3117, 0.0752, 0.1714, 0.1790, 0.0826, 0.5822, 0.2779, 0.1193,
        0.0987, 0.1657, 0.1468, 0.4255, 0.1246, 0.1702, 0.2166, 0.1565, 0.1349,
        0.1595, 0.1613, 0.1052, 0.3013, 0.1490, 0.1406, 0.1506, 0.2642, 0.0884,
        0.2365, 0.4697, 0.1373, 0.2473, 0.1641, 0.3317, 0.1349, 0.2886, 0.1278,
        0.1632, 0.1174, 0.2296, 0

Calculating probabilities:   0%|          | 1/73152 [00:04<86:03:28,  4.24s/it]

before ---------------
after -------------
before ---------------


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x1638e20c0>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 1442, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/connection

IndexError: invalid index of a 0-dim tensor. Use `tensor.item()` in Python or `tensor.item<T>()` in C++ to convert a 0-dim tensor to a number