In [8]:
BLACKHOLE = False


import os
import sys
from pathlib import Path


if BLACKHOLE:
    workspace_path = os.path.expandvars('$BLACKHOLE')
    sys.path.append(workspace_path+'/DeepLearning/02456_news_project/src')
    DATAPATH = Path(workspace_path+"/DeepLearning/ebnerd_data").expanduser()
else:
    DATAPATH = Path("~/ebnerd_data").expanduser()

DATASET = "ebnerd_demo"
# DATASET = "ebnerd_small"

Packages:
- torch (PyTorch)
- transformers (Huggingface)

In [9]:
import torch

print("torch version:", torch.__version__)

# Check gpu availability


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Test:
#print(torch.zeros(1).cuda())

torch version: 2.5.1+cu124
cuda


TODO HERFRA OG NED

In [10]:
from utils.data_handler import NewsDataset
import from_ebrec._constants as cs

SEED = 42
HISTORY_SIZE = 20

COLS = [
    cs.DEFAULT_USER_COL,
    cs.DEFAULT_IMPRESSION_ID_COL,
    cs.DEFAULT_IMPRESSION_TIMESTAMP_COL,
    cs.DEFAULT_HISTORY_ARTICLE_ID_COL,
    cs.DEFAULT_CLICKED_ARTICLES_COL,
    cs.DEFAULT_INVIEW_ARTICLES_COL,
]

FRACTION = 0.01

# test
dataset = NewsDataset(dataset_path=DATAPATH.joinpath(DATASET))

dataset.setup_df(dataset_path = DATAPATH, datasplit = DATASET, history_size = HISTORY_SIZE, columns = COLS, fraction = FRACTION, seed = SEED)

In [11]:
import transformers as huggingface
from from_ebrec._nlp import get_transformers_word_embeddings
from from_ebrec._polars import concat_str_columns
from from_ebrec._articles import convert_text2encoding_with_transformers
from from_ebrec._articles import create_article_id_to_value_mapping


df_articles = dataset.df_articles

TRANSFORMER_MODEL_NAME = "FacebookAI/xlm-roberta-base"
TEXT_COLUMNS_TO_USE = [cs.DEFAULT_SUBTITLE_COL, cs.DEFAULT_TITLE_COL]
MAX_TITLE_LENGTH = 30

# LOAD HUGGINGFACE:
transformer_model = huggingface.AutoModel.from_pretrained(TRANSFORMER_MODEL_NAME)
transformer_tokenizer = huggingface.AutoTokenizer.from_pretrained(TRANSFORMER_MODEL_NAME)

word2vec_embedding = get_transformers_word_embeddings(transformer_model)
df_articles, cat_cal = concat_str_columns(df_articles, columns=TEXT_COLUMNS_TO_USE)
df_articles, token_col_title = convert_text2encoding_with_transformers(
    df_articles, transformer_tokenizer, cat_cal, max_length=MAX_TITLE_LENGTH
)
article_mapping = create_article_id_to_value_mapping(
    df=df_articles, value_col=token_col_title
)

In [12]:
from dataloader import NRMSDataLoader

BATCH_SIZE = 32

train_dataloader = NRMSDataLoader(
    behaviors= dataset.df_train,
    article_dict=article_mapping,
    unknown_representation="zeros",
    history_column= cs.DEFAULT_HISTORY_ARTICLE_ID_COL,
    eval_mode=False,
    batch_size=BATCH_SIZE,
)
val_dataloader = NRMSDataLoader(
    behaviors= dataset.df_validation,
    article_dict=article_mapping,
    unknown_representation="zeros",
    history_column= cs.DEFAULT_HISTORY_ARTICLE_ID_COL,
    eval_mode=False,
    batch_size=BATCH_SIZE,
)


In [None]:
from nrms import NRMSModel
from hyperparameters import hparams_nrms


hparams = hparams_nrms()

# PARAMETERS
hparams.title_size = MAX_TITLE_LENGTH
hparams.history_size = HISTORY_SIZE

# MODEL ARCHITECTURE
hparams.head_num = 20
hparams.head_dim = 20
hparams.attention_hidden_dim = 200

# MODEL OPTIMIZER:
hparams.optimizer = "adam"
hparams.loss = "cross_entropy_loss"
hparams.dropout = 0.2
hparams.learning_rate = 1e-4

model = NRMSModel(hparams=hparams, word2vec_embedding=word2vec_embedding, seed = SEED, debug=True)

print(model)


NRMSModel(
  (news_encoder): NewsEncoder(
    (embedding): Embedding(250002, 768)
    (dropout): Dropout(p=0.2, inplace=False)
    (self_attention): SelfAttention()
    (dense_layers): Sequential(
      (0): Linear(in_features=300, out_features=400, bias=True)
      (1): ReLU()
      (2): LayerNorm((400,), eps=1e-05, elementwise_affine=True)
      (3): Dropout(p=0.2, inplace=False)
      (4): Linear(in_features=400, out_features=400, bias=True)
      (5): ReLU()
      (6): LayerNorm((400,), eps=1e-05, elementwise_affine=True)
      (7): Dropout(p=0.2, inplace=False)
      (8): Linear(in_features=400, out_features=300, bias=True)
      (9): ReLU()
      (10): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
      (11): Dropout(p=0.2, inplace=False)
    )
    (att_layer): AttLayer2()
  )
  (user_encoder): UserEncoder(
    (title_encoder): NewsEncoder(
      (embedding): Embedding(250002, 768)
      (dropout): Dropout(p=0.2, inplace=False)
      (self_attention): SelfAttention()
    

In [None]:
# Train the model
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm  # for progress bars

EPOCHS = 5

# Define the loss function and optimizer
if hparams.loss == "cross_entropy_loss":
    criterion = nn.CrossEntropyLoss() 
if hparams.loss == "mse_loss":
    criterion = nn.MSELoss()
else:
    raise ValueError(f"Loss function {hparams.loss} not supported")

if hparams.optimizer == "adam":
    optimizer = optim.Adam(model.parameters(), lr=hparams_nrms.learning_rate)
else:
    raise ValueError(f"Optimizer {hparams.optimizer} not supported")

# Move model to GPU if available
model.to(device)

# Training loop
train_loss_history, val_loss_history = [], []

for epoch in range(EPOCHS):
    model.train()  # Set the model to training mode
    train_loss = 0.0

    for batch in tqdm(train_dataloader, desc=f"Training Epoch {epoch + 1}/{EPOCHS}"):
        # Unpacking of batch
        (inputs, labels) = batch
        his_input_title, pred_input_title = inputs

        # Move data to device
        his_input_title = his_input_title.to(device)
        pred_input_title = pred_input_title.to(device)

        labels = labels.to(device)

        # Forward pass
        outputs = model(pred_input_title, his_input_title)  
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Validation loop
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in tqdm(val_dataloader, desc=f"Validation Epoch {epoch + 1}/{EPOCHS}"):
            (inputs, labels) = batch
            his_input_title, pred_input_title = inputs

            his_input_title = his_input_title.to(device)
            pred_input_title = pred_input_title.to(device)
            labels = labels.to(device)

            outputs = model(pred_input_title, his_input_title)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

    train_loss /= len(train_dataloader)
    val_loss /= len(val_dataloader)
    train_loss_history.append(train_loss)
    val_loss_history.append(val_loss)

    print(f"Epoch {epoch + 1}/{EPOCHS}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")


Training Epoch 1/5:   0%|          | 0/7 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 1/5:  14%|█▍        | 1/7 [00:00<00:01,  3.77it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 1/5:  43%|████▎     | 3/7 [00:00<00:00,  4.40it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 1/5:  57%|█████▋    | 4/7 [00:00<00:00,  4.50it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 1/5:  71%|███████▏  | 5/7 [00:01<00:00,  4.57it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 1/5: 100%|██████████| 7/7 [00:01<00:00,  4.63it/s]


Model: Shape of pred_input_title: torch.Size([20, 5, 30])
Model: Shape of his_input_title: torch.Size([20, 20, 30])
UE1: Shape of input: torch.Size([20, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([20, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([20, 20, 300]) . Should be (batch_s

Validation Epoch 1/5:   0%|          | 0/2 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Validation Epoch 1/5: 100%|██████████| 2/2 [00:00<00:00, 25.51it/s]


NE6: Shape after dense layers: torch.Size([5, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([5, 300]) . Should be (batch_size, head_num * head_dim)
NE0: Shape of input: torch.Size([5, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([5, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([5, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([5, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([5, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([5, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([5, 300]) . Should be (batch_size, head_num * head_dim)
NE0: Shape of input: torch.Size([5, 30]) . Should be (batch_size, title_size)
NE2: Shape 

Training Epoch 2/5:   0%|          | 0/7 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 2/5:  29%|██▊       | 2/7 [00:00<00:01,  4.53it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 2/5:  43%|████▎     | 3/7 [00:00<00:00,  4.50it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 2/5:  57%|█████▋    | 4/7 [00:00<00:00,  4.33it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 2/5:  71%|███████▏  | 5/7 [00:01<00:00,  4.37it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 2/5: 100%|██████████| 7/7 [00:01<00:00,  4.62it/s]


Model: Shape of pred_input_title: torch.Size([20, 5, 30])
Model: Shape of his_input_title: torch.Size([20, 20, 30])
UE1: Shape of input: torch.Size([20, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([20, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([20, 20, 300]) . Should be (batch_s

Validation Epoch 2/5:   0%|          | 0/2 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Validation Epoch 2/5: 100%|██████████| 2/2 [00:00<00:00, 33.32it/s]


Model: Shape of pred_input_title: torch.Size([4, 5, 30])
Model: Shape of his_input_title: torch.Size([4, 20, 30])
UE1: Shape of input: torch.Size([4, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([4, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([4, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([4, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([4, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([4, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([4, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([4, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([4, 20, 300]) . Should be (batch_size, histor

Training Epoch 3/5:   0%|          | 0/7 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 3/5:  29%|██▊       | 2/7 [00:00<00:01,  4.68it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 3/5:  43%|████▎     | 3/7 [00:00<00:00,  4.57it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 3/5:  57%|█████▋    | 4/7 [00:00<00:00,  4.62it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 3/5:  71%|███████▏  | 5/7 [00:01<00:00,  4.61it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 3/5: 100%|██████████| 7/7 [00:01<00:00,  4.76it/s]


Model: Shape of pred_input_title: torch.Size([20, 5, 30])
Model: Shape of his_input_title: torch.Size([20, 20, 30])
UE1: Shape of input: torch.Size([20, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([20, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([20, 20, 300]) . Should be (batch_s

Validation Epoch 3/5:   0%|          | 0/2 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Validation Epoch 3/5: 100%|██████████| 2/2 [00:00<00:00, 33.33it/s]


NE6: Shape after dense layers: torch.Size([5, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([5, 300]) . Should be (batch_size, head_num * head_dim)
Model: Shape of user_representation: torch.Size([4, 300])
Model: Shape of news_representations: torch.Size([5, 4, 300])
CP1: Shape of news_representation: torch.Size([5, 4, 300]) . Should be (candidate_size, batch_size, 400)
CP2: Shape of user_representation: torch.Size([4, 300]) . Should be (batch_size, attention_hidden_dim)
CP3: Reshape of news_representation: torch.Size([4, 5, 300]) . Should be (batch_size, candidate_size, attention_hidden_dim)
CP3: Shape of prob: torch.Size([4, 5]) . Should be (batch_size, candidate_size)
Epoch 3/5: Train Loss = 1.6021, Val Loss = 1.6466


Training Epoch 4/5:   0%|          | 0/7 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 4/5:  14%|█▍        | 1/7 [00:00<00:01,  4.42it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 4/5:  29%|██▊       | 2/7 [00:00<00:01,  4.46it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 4/5:  57%|█████▋    | 4/7 [00:00<00:00,  4.58it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 4/5:  71%|███████▏  | 5/7 [00:01<00:00,  4.61it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 4/5: 100%|██████████| 7/7 [00:01<00:00,  4.74it/s]


Model: Shape of pred_input_title: torch.Size([20, 5, 30])
Model: Shape of his_input_title: torch.Size([20, 20, 30])
UE1: Shape of input: torch.Size([20, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([20, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([20, 20, 300]) . Should be (batch_s

Validation Epoch 4/5:   0%|          | 0/2 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Validation Epoch 4/5: 100%|██████████| 2/2 [00:00<00:00, 35.09it/s]


Model: Shape of pred_input_title: torch.Size([4, 5, 30])
Model: Shape of his_input_title: torch.Size([4, 20, 30])
UE1: Shape of input: torch.Size([4, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([4, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([4, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([4, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([4, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([4, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([4, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([4, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([4, 20, 300]) . Should be (batch_size, histor

Training Epoch 5/5:   0%|          | 0/7 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 5/5:  14%|█▍        | 1/7 [00:00<00:01,  4.74it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 5/5:  29%|██▊       | 2/7 [00:00<00:01,  4.62it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 5/5:  43%|████▎     | 3/7 [00:00<00:00,  4.66it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 5/5:  57%|█████▋    | 4/7 [00:00<00:00,  4.59it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 5/5:  71%|███████▏  | 5/7 [00:01<00:00,  4.47it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Training Epoch 5/5: 100%|██████████| 7/7 [00:01<00:00,  4.73it/s]


Model: Shape of pred_input_title: torch.Size([20, 5, 30])
Model: Shape of his_input_title: torch.Size([20, 20, 30])
UE1: Shape of input: torch.Size([20, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([20, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([20, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([20, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([20, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([20, 20, 300]) . Should be (batch_s

Validation Epoch 5/5:   0%|          | 0/2 [00:00<?, ?it/s]

Model: Shape of pred_input_title: torch.Size([32, 5, 30])
Model: Shape of his_input_title: torch.Size([32, 20, 30])
UE1: Shape of input: torch.Size([32, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([32, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([32, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([32, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([32, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([32, 20, 300]) . Should be (batch_s

Validation Epoch 5/5: 100%|██████████| 2/2 [00:00<00:00, 38.46it/s]

Model: Shape of pred_input_title: torch.Size([4, 5, 30])
Model: Shape of his_input_title: torch.Size([4, 20, 30])
UE1: Shape of input: torch.Size([4, 20, 30]) . Should be (batch_size, history_size, title_size)
NE0: Shape of input: torch.Size([4, 30]) . Should be (batch_size, title_size)
NE2: Shape after casting to long: torch.Size([4, 30]) . Should be (batch_size, title_size)
NE3: Shape after embedding: torch.Size([4, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE4: Shape after dropout: torch.Size([4, 30, 768]) . Should be (batch_size, title_size, embedding_dim)
NE5: Shape after self attention: torch.Size([4, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE6: Shape after dense layers: torch.Size([4, 30, 300]) . Should be (batch_size, title_size, head_num * head_dim)
NE7: Shape after att layer: torch.Size([4, 300]) . Should be (batch_size, head_num * head_dim)
UE2: Shape after title encoder: torch.Size([4, 20, 300]) . Should be (batch_size, histor


