In [1]:
BLACKHOLE = False


import os

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" # fixes problem with graph
import torch

print("torch version:", torch.__version__)

# Check gpu availability


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Test:
#print(torch.zeros(1).cuda())


torch version: 2.5.1+cu124
cuda


In [None]:
from nrms import NRMSModel
from hyperparameters import hparams_nrms
import numpy as np

hparams = hparams_nrms()

MAX_TITLE_LENGTH = 10
HISTORY_SIZE = 30
BATCH_SIZE = 64

word2vec_embedding = np.random.rand(1000, 100)

# PARAMETERS
hparams.title_size = MAX_TITLE_LENGTH
hparams.history_size = HISTORY_SIZE
hparams.batch_size = BATCH_SIZE
hparams.candidate_size = 5

# MODEL ARCHITECTURE
hparams.head_num = 16
hparams.head_dim = 16
hparams.attention_hidden_dim = 1000
hparams.linear_hidden_dim = 200
hparams.embedding_dim = word2vec_embedding.shape[1]

hparams.use_positional_encoding = False
hparams.use_learned_positions = False

# MODEL OPTIMIZER:
hparams.optimizer = "adam"
hparams.loss = "cross_entropy_loss"
hparams.dropout = 0.2
hparams.learning_rate = 1e-4

model = NRMSModel(hparams=hparams, word2vec_embedding=word2vec_embedding)

print(model)


NRMSModel(
  (news_encoder): NewsEncoder(
    (embedding): Embedding(1000, 100)
    (dropout): Dropout(p=0.2, inplace=False)
    (self_attention): SelfAttention(
      (query_proj): Linear(in_features=100, out_features=256, bias=True)
      (key_proj): Linear(in_features=100, out_features=256, bias=True)
      (value_proj): Linear(in_features=100, out_features=256, bias=True)
    )
    (dense_layers): Sequential(
      (0): Linear(in_features=256, out_features=200, bias=True)
      (1): ReLU()
      (2): LayerNorm((200,), eps=1e-05, elementwise_affine=True)
      (3): Dropout(p=0.2, inplace=False)
      (4): Linear(in_features=200, out_features=200, bias=True)
      (5): ReLU()
      (6): LayerNorm((200,), eps=1e-05, elementwise_affine=True)
      (7): Dropout(p=0.2, inplace=False)
      (8): Linear(in_features=200, out_features=256, bias=True)
      (9): ReLU()
      (10): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (11): Dropout(p=0.2, inplace=False)
    )
    (att_la

In [3]:
import torch.nn as nn
import torch.optim as optim

# Define the loss function and optimizer
if hparams.loss == "cross_entropy_loss":
    criterion = nn.CrossEntropyLoss() 
elif hparams.loss == "mse_loss":
    criterion = nn.MSELoss()
else:
    raise ValueError(f"Loss function {hparams.loss} not supported")

if hparams.optimizer == "adam":
    optimizer = optim.Adam(model.parameters(), lr=hparams_nrms.learning_rate)
else:
    raise ValueError(f"Optimizer {hparams.optimizer} not supported")

In [4]:
# Train the model 
NPRATIO = 4

# Move model to GPU if available
model.to(device)

# Define the shapes of the input data
his_input_title_shape = (HISTORY_SIZE, MAX_TITLE_LENGTH)
pred_input_title_shape = (NPRATIO + 1, MAX_TITLE_LENGTH)
label_shape = (NPRATIO + 1,)
vocab_size = word2vec_embedding.shape[0]

# Generate some random input data for input_1 with values between 0 and 1
his_input_title = np.random.randint(0, vocab_size, (BATCH_SIZE, *his_input_title_shape))

# Generate some random input data for input_2 with values between 0 and 1
pred_input_title = np.random.randint(
    0, vocab_size, (BATCH_SIZE, *pred_input_title_shape)
)

# Generate some random label data with values between 0 and 1
label_data = np.zeros((BATCH_SIZE, *label_shape), dtype=int)
for row in label_data:
    row[np.random.choice(label_shape[0])] = 1

print(HISTORY_SIZE)
print(MAX_TITLE_LENGTH)
print(NPRATIO)
print(vocab_size)

# Print the shapes of the input data to verify they match the model's input layers
print(his_input_title.shape) 
print(pred_input_title.shape)
print(label_data.shape)

# Convert the input data to PyTorch tensors
his_input_title = torch.from_numpy(his_input_title).long().to(device)
pred_input_title = torch.from_numpy(pred_input_title).long().to(device)
label_data = torch.from_numpy(label_data).float().to(device)

# Zero the gradients
optimizer.zero_grad()

# Forward pass
outputs = model(pred_input_title, his_input_title)

print(outputs.shape)
print(label_data.shape)

print(outputs)
print(label_data)

# Calculate the loss
loss = criterion(outputs, label_data)

# Backward pass
loss.backward()

# Update the weights
optimizer.step()


print("Done")



50
10
4
1000
(64, 50, 10)
(64, 5, 10)
(64, 5)


AssertionError: Shape of input: torch.Size([50, 10]), expected: (64, 10)