In [1]:
import torch

print("torch version:", torch.__version__)

# Check gpu availability


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.zeros(1).cuda())


torch version: 2.5.1+cu124
cuda
tensor([0.], device='cuda:0')


In [2]:
# Check if the model can be loaded
from nrms import NRMSModel
from hyperparameters import HyperParameters

# hparams contains the hyperparameters of the model:
# They are:
# vocab_size: int
# word_embedding_dim: int
# dropout: float


hparams = HyperParameters(
    vocab_size=100, 
    word_embedding_dim=300, 
    dropout=0.1, 
    head_num=8,
    head_dim=64,
    attention_hidden_dim=512,
    title_size=20
)

word2vec_embedding = torch.randn(hparams.vocab_size, hparams.word_embedding_dim)

model = NRMSModel(hparams=hparams, word2vec_embedding=word2vec_embedding)

print(model)




NRMSModel(
  (word2vec_embedding): Embedding(100, 300)
  (dropout): Dropout(p=0.1, inplace=False)
  (news_encoder): Sequential(
    (0): Embedding(100, 300)
    (1): Dropout(p=0.1, inplace=False)
    (2): SelfAttention(
      (query_layer): Linear(in_features=64, out_features=512, bias=True)
      (key_layer): Linear(in_features=64, out_features=512, bias=True)
      (value_layer): Linear(in_features=64, out_features=512, bias=True)
    )
    (3): Dropout(p=0.1, inplace=False)
    (4): AttentionLayer2(
      (attention_weight): Linear(in_features=512, out_features=1, bias=True)
    )
  )
  (user_encoder): Sequential(
    (0): Linear(in_features=20, out_features=512, bias=True)
    (1): SelfAttention(
      (query_layer): Linear(in_features=64, out_features=512, bias=True)
      (key_layer): Linear(in_features=64, out_features=512, bias=True)
      (value_layer): Linear(in_features=64, out_features=512, bias=True)
    )
    (2): AttentionLayer2(
      (attention_weight): Linear(in_featu

TODO HERFRA OG NED

In [None]:
# Import the dataset
from dataset import NewsDataset

dataset = NewsDataset(
    data_path="data/train_data.csv",
    hparams=hparams
)

print(dataset[0])

In [None]:
# Use the dataloader to separate the data into batches / validation / test sets
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

for batch in dataloader:
    print(batch)
    break


In [None]:
# Train the model

from train import train

train(
    model=model,
    dataloader=dataloader,
    hparams=hparams,
    device=device
)

# Save the model
torch.save(model.state_dict(), "model.pth")