In [2]:
from data_loader import *
from model import *

  from .autonotebook import tqdm as notebook_tqdm


Dataset: ebnerd_demo
Loading GloVe vectors from saved file: ../Data/glove_vectors.pt


Statistics for Train Dataset:
Browsed News - Min: 5, Max: 1000, Mean: 182.61, Std: 180.37
Candidate News - Min: 5, Max: 100, Mean: 11.24, Std: 7.92



Statistics for Validation Dataset:
Browsed News - Min: 5, Max: 1000, Mean: 276.52, Std: 219.08
Candidate News - Min: 5, Max: 93, Mean: 12.03, Std: 9.32



In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence


# Model test

In [4]:
print("\nbrowsed news: ", browsed_news_train.shape
      , "\ncandidate news: ", candidate_news_train.shape
      , "\nclicked news: ", clicked_news_train.shape)



browsed news:  torch.Size([2554, 20, 114]) 
candidate news:  torch.Size([2554, 20, 114]) 
clicked news:  torch.Size([2554, 20])


In [5]:

batch_size = 16

### Test news encoder
print("\n ------NEWS ENCODER------")

word_embedding_matrix = glove_vectors  # Assume glove_vectors are loaded and of correct size
attention_dim = 200
# Instantiate the model
news_encoder = NewsEncoder(embed_size=300, heads=15, word_embedding_matrix=word_embedding_matrix, attention_dim=attention_dim)

# Random input

x = browsed_news_train[:batch_size, 1, :] #[Batch size, 1 news, 26 words]

output = news_encoder(x)

print("input shape:", x.shape)
print("output shape:", output.shape) # News encoder works fine




 ------NEWS ENCODER------
input shape: torch.Size([16, 114])
output shape: torch.Size([16, 300])


In [6]:
### Test user encoder
print('\n ------USER ENCODER------')

user_encoder = UserEncoder(embed_size=300, heads=15, attention_dim=200)

x = browsed_news_train[:batch_size, :, :] #[Batch size, all news, 26 words]

e = [news_encoder(news) for news in x] # Apply the news encoder to each news article
e = torch.stack(e, dim=0)

output = user_encoder(e)

print("input shape:", e.shape)
print("output shape:", output.shape) # User encoder works fine



 ------USER ENCODER------
input shape: torch.Size([16, 20, 300])
output shape: torch.Size([16, 300])


In [7]:

### Test full model
print('\n -----COMPLETE MODEL------') 

batch_size = 3

model_final = NRMS(embed_size=300, heads=15, word_embedding_matrix=glove_vectors, attention_dim=200)

browsed_news_batch = browsed_news_train[:batch_size, :, :] #[Batch size, all news, 26 words]
candidate_news_batch = candidate_news_train[:batch_size, :, :] #[Batch size, all news, 26 words]
clicked_news_batch = clicked_news_train[:batch_size, :] #[Batch size, all news]

# Forward pass for the entire batch
click = model_final(browsed_news_batch, candidate_news_batch)

print(f"\nInput shape: Browsed = {browsed_news_batch.shape}, Candidate = {candidate_news_batch.shape}")
print(f"Output shape: Click = {click.shape}") # Full model works fine

print("\nSum of probabilities:", torch.sum(click, dim=1)) # Probabilities sum to 1
print(f"\nClicked indices: True = {torch.argmax(click, dim=1)}, Predicted = {torch.argmax(clicked_news_batch, dim=1)}") # Predicted indices match true indices



 -----COMPLETE MODEL------

Input shape: Browsed = torch.Size([3, 20, 114]), Candidate = torch.Size([3, 20, 114])
Output shape: Click = torch.Size([3, 20])

Sum of probabilities: tensor([1.0000, 1.0000, 1.0000], grad_fn=<SumBackward1>)

Clicked indices: True = tensor([ 4, 15,  0]), Predicted = tensor([0, 0, 0])


# Test training

In [8]:
#print length of test_loader
print("\nLength of train_loader: ", len(train_loader)) # Length of test_loader is 1


#print one batch of data from test_loader
print("\nOne batch of data from train_loader: ")
print(train_loader.dataset[0]) # One batch of data from test_loader is printed

print(train_loader.dataset[0]["browsed_news"].size()) # Shape of browsed_news_train
print(train_loader.dataset[0]["candidate_news"].size()) # Shape of candidate_news_train
print(train_loader.dataset[0]["clicked_idx"].size()) # Shape of clicked_news_train




Length of train_loader:  40

One batch of data from train_loader: 
{'browsed_news': tensor([[271792,  21932,     48,  ...,      0,      0,      0],
        [     2,      2,     48,  ...,      0,      0,      0],
        [     2,      2,  32085,  ...,      0,      0,      0],
        ...,
        [398853,      2,      2,  ...,      0,      0,      0],
        [     2,  12601, 354434,  ...,      0,      0,      0],
        [398853,      2,      2,  ...,      0,      0,      0]]), 'candidate_news': tensor([[ 72313,   3883,      5,  ...,      0,      0,      0],
        [     2,  42973,  37972,  ...,      0,      0,      0],
        [ 93327, 314424,  67929,  ...,      0,      0,      0],
        ...,
        [     2,  10533,      2,  ...,      0,      0,      0],
        [     2,     48,      2,  ...,      0,      0,      0],
        [ 96666,     48,     74,  ...,      0,      0,      0]]), 'clicked_idx': tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
    

In [9]:
nrms_model = NRMS(embed_size=300, heads=4, word_embedding_matrix=glove_vectors, attention_dim=128)
# Optimizer for model
optimizer = torch.optim.Adam(nrms_model.parameters(), lr=0.001)


In [10]:
nrms_model.train()


NRMS(
  (news_encoder): NewsEncoder(
    (embedding): Embedding(400004, 300, padding_idx=0)
    (multi_head_attention): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=300, out_features=300, bias=True)
    )
    (additive_attention): AdditiveAttention(
      (V_w): Linear(in_features=300, out_features=128, bias=True)
    )
  )
  (user_encoder): UserEncoder(
    (multi_head_attention): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=300, out_features=300, bias=True)
    )
    (additive_attention): AdditiveAttention(
      (V_w): Linear(in_features=300, out_features=128, bias=True)
    )
  )
)

In [32]:

# Create an iterator from the dataloader
data_iter = iter(train_loader)

# Get the first batch from the iterator
batch = next(data_iter)

user_histories = batch['browsed_news']
candidate_news = batch['candidate_news']
labels = batch['clicked_idx']

print("\nUser histories shape: ", user_histories.shape)
print("Candidate news shape: ", candidate_news.shape)
print("Labels shape: ", labels.shape)
print("Labels: ", labels)


User histories shape:  torch.Size([64, 20, 114])
Candidate news shape:  torch.Size([64, 20, 114])
Labels shape:  torch.Size([64, 20])
Labels:  tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])


In [12]:
click_prob = nrms_model(user_histories, candidate_news)

In [20]:
sum(click_prob[0]) # Probabilities sum to 1
no_batches, no_candidate_news = click_prob.size()
positive_index = torch.arange(no_batches), torch.argmax(labels, dim=1)
positive_sample = click_prob[positive_index]

In [31]:
print(positive_index[0].size())
print(positive_index[0])
print(positive_index[1])

torch.Size([64])
tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
        54, 55, 56, 57, 58, 59, 60, 61, 62, 63])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [36]:
mask = torch.ones_like(click_prob, dtype=torch.bool)
mask[positive_index] = False
negative_samples = click_prob[mask].view(no_batches, -1)

print(negative_samples.size())
print(click_prob)
print(negative_samples)

torch.Size([64, 19])
tensor([[0.0542, 0.0550, 0.0529,  ..., 0.0533, 0.0493, 0.0499],
        [0.0461, 0.0492, 0.0525,  ..., 0.0551, 0.0484, 0.0516],
        [0.0476, 0.0530, 0.0475,  ..., 0.0467, 0.0541, 0.0521],
        ...,
        [0.0538, 0.0456, 0.0528,  ..., 0.0531, 0.0509, 0.0540],
        [0.0460, 0.0443, 0.0476,  ..., 0.0525, 0.0510, 0.0545],
        [0.0536, 0.0471, 0.0488,  ..., 0.0510, 0.0523, 0.0431]],
       grad_fn=<SoftmaxBackward0>)
tensor([[0.0550, 0.0529, 0.0475,  ..., 0.0533, 0.0493, 0.0499],
        [0.0492, 0.0525, 0.0511,  ..., 0.0551, 0.0484, 0.0516],
        [0.0530, 0.0475, 0.0466,  ..., 0.0467, 0.0541, 0.0521],
        ...,
        [0.0456, 0.0528, 0.0479,  ..., 0.0531, 0.0509, 0.0540],
        [0.0443, 0.0476, 0.0523,  ..., 0.0525, 0.0510, 0.0545],
        [0.0471, 0.0488, 0.0503,  ..., 0.0510, 0.0523, 0.0431]],
       grad_fn=<ViewBackward0>)


In [40]:
K=4
# Use randperm instead of randint so that we don't have repetitions
random_negative_indices = torch.randperm(no_candidate_news)[:K] 
# Neg samples for all users (using the same indexes)
negative_samples = click_prob[:, random_negative_indices]  # [batch_size, K]

print(random_negative_indices)
print(negative_samples)

tensor([ 3, 15,  5,  2])
tensor([[0.0475, 0.0488, 0.0476, 0.0529],
        [0.0511, 0.0514, 0.0481, 0.0525],
        [0.0466, 0.0475, 0.0480, 0.0475],
        [0.0542, 0.0502, 0.0476, 0.0480],
        [0.0493, 0.0528, 0.0520, 0.0540],
        [0.0475, 0.0504, 0.0467, 0.0495],
        [0.0451, 0.0506, 0.0522, 0.0538],
        [0.0532, 0.0536, 0.0487, 0.0532],
        [0.0481, 0.0476, 0.0520, 0.0472],
        [0.0540, 0.0481, 0.0475, 0.0496],
        [0.0531, 0.0484, 0.0535, 0.0514],
        [0.0467, 0.0523, 0.0472, 0.0405],
        [0.0488, 0.0505, 0.0476, 0.0551],
        [0.0457, 0.0492, 0.0476, 0.0576],
        [0.0484, 0.0543, 0.0547, 0.0475],
        [0.0502, 0.0532, 0.0484, 0.0553],
        [0.0442, 0.0482, 0.0505, 0.0504],
        [0.0517, 0.0473, 0.0548, 0.0536],
        [0.0466, 0.0528, 0.0518, 0.0516],
        [0.0536, 0.0487, 0.0467, 0.0524],
        [0.0483, 0.0514, 0.0535, 0.0460],
        [0.0477, 0.0497, 0.0518, 0.0466],
        [0.0534, 0.0462, 0.0471, 0.0519],
        [

In [55]:
# Compute posterior prob for the positive sample
exp_pos = torch.exp(positive_sample)  # [batch_size]
exp_neg = torch.exp(negative_samples)  # [batch_size, K]
sum_exp_neg = torch.sum(exp_neg, dim=1)  # [batch_size]
pi_positive = exp_pos / (exp_pos + sum_exp_neg)  # [batch_size]

print(pi_positive)

tensor([0.2008, 0.1992, 0.2000, 0.1995, 0.1986, 0.1995, 0.2005, 0.1994, 0.2007,
        0.1989, 0.1998, 0.2006, 0.1995, 0.2003, 0.1992, 0.1983, 0.2006, 0.2000,
        0.1991, 0.1991, 0.1992, 0.1999, 0.1996, 0.1994, 0.1986, 0.1998, 0.1999,
        0.1993, 0.2006, 0.2006, 0.1996, 0.1996, 0.1997, 0.2003, 0.2002, 0.2000,
        0.2003, 0.2005, 0.1992, 0.1993, 0.1996, 0.2004, 0.1987, 0.1994, 0.2005,
        0.2001, 0.2000, 0.1987, 0.2002, 0.2010, 0.1996, 0.1992, 0.1990, 0.1996,
        0.2008, 0.2008, 0.1993, 0.1994, 0.2001, 0.1999, 0.1998, 0.2007, 0.1992,
        0.2006], grad_fn=<DivBackward0>)


In [51]:
loss = -torch.log(pi_positive).mean()
print(loss)

tensor(1.6106, grad_fn=<NegBackward0>)
