In [14]:
import torch.nn as nn
from torchmetrics.functional import retrieval_normalized_dcg
from sklearn.metrics import ndcg_score
import numpy as np
import torch
import requests
from dataclasses import dataclass
from collections import defaultdict
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

import sys
sys.path.append('BERT4Rec-VAE-Pytorch')

from models.bert import BERTModel
from models.bert_modules.bert import BERT

import warnings
warnings.filterwarnings("ignore")

### Service functions

In [22]:
def equalize_seq(seq, max_length, end_code):
  
  if len(seq) >= max_length:
    return seq[:max_length]
  else:
    return (max_length - len(seq)) * [end_code] + seq


def recall_k(y_pred, y_true, k=10):
    
    top_k = torch.topk(y_pred, k).indices.tolist()
    result = [el[0] in el[1] for el in zip(y_true.tolist(), top_k)]
    
    return np.mean(result)


def ndcg_k(y_pred, y_true, k=10):
    
    top_k = torch.topk(y_pred, k).indices.tolist()
    results = []
    for true_label, preds in zip(y_true.tolist(), top_k):
        tl = [[el == true_label for el in preds]]
        preds = [[1 for _ in preds]]
        results.append(ndcg_score(tl, preds))
    
    return np.mean(results)

### Load data

In [16]:
data = (pd.read_csv('ml-1m/ratings.dat', 
                    sep='::',
                    header=None,
                    index_col=0,
                    names=['user_id', 'movie_id', 'rating', 'ts'], 
                    engine='python')
          .reset_index(drop=False)
       )

sequences = data.sort_values(by=['user_id', 'ts']).groupby('user_id')['movie_id'].agg(lambda x: list(x)).to_dict()
sequences = {u: s for u, s in sequences.items() if len(s) > 0}

mask_code = 0
max_len = 100
end_code = data['movie_id'].max() + 1

### Train / Valid / Test Split

In [4]:
train = defaultdict(list)
val = {}
test = {}

for user, sequence in sequences.items():

    train[user] = equalize_seq(sequence[:-1], max_length=max_len, end_code=end_code)

    if np.random.choice([0, 1]):
        val[user] = equalize_seq(sequence, max_length=max_len, end_code=end_code)
      
    else:
        test[user] = equalize_seq(sequence, max_length=max_len, end_code=end_code)

test_indexes = np.array(test.keys())
val_indexes = np.array(val.keys())

### Define Model

In [5]:
@dataclass
class BertConf:
    bert_max_len: int
    num_items: int
    bert_num_blocks: int
    bert_num_heads: int
    bert_hidden_units: int
    bert_dropout: float = 0.1
    model_init_seed: int = 42

conf = BertConf(bert_max_len=max_len, 
                num_items=end_code + 1, 
                bert_num_blocks=2, 
                bert_num_heads=2, 
                bert_hidden_units=100)

model = BERTModel(conf).to('cuda:0')

### Define Dataloader

In [6]:
import torch
from torch.utils.data import Dataset, DataLoader


class TrainMaskedDataset(Dataset):
    def __init__(self, data, end_code=-1, p=0.2):
        self.data = data
        self.p = p
        self.end_code=end_code

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # Get X and y from the initial dataset

        batch = self.data[index]
        mask = torch.bernoulli(batch, p=self.p)

        # Create train
        X = batch * (1 - mask)

        # Create target
        y = batch * mask
        y[mask == 0] = self.end_code

        return X, y

    
class ValidMaskedDataset(Dataset):
    def __init__(self, data, end_code):
        self.data = data
        self.end_code=end_code

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # Get X and y from the initial dataset

        batch = self.data[index]
        mask = torch.zeros(batch.size())
        mask[-1] = 1

        # Create train
        X = batch * (1 - mask)

        # Create target
        y = batch[-1]
        
        return X, y

### Train Model

In [24]:
dataset = TrainMaskedDataset(torch.tensor(list(train.values())), end_code)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=256, shuffle=True)

val_dataset = ValidMaskedDataset(torch.tensor(list(val.values())), end_code)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=256, shuffle=True)

# Using an Adam Optimizer with lr = 0.1
loss_function = torch.nn.CrossEntropyLoss(ignore_index=end_code, reduction='mean')
optimizer = torch.optim.Adam(model.parameters())

epochs = 5000
outputs = []
losses = []

counter = 0
recall_list = []
for epoch in range(epochs):
    for X, y in train_loader:

        optimizer.zero_grad()
        
        X = X.to('cuda:0')
        y = y.to('cuda:0')

        loss = loss_function(model.forward(X).view(-1, 3955), y.view(-1))

        losses.append(loss.item())
        loss.backward()
         
        optimizer.step()
        
        counter += 1
        if counter % 500 == 0:
            val_losses = []
            recall_batch = []
            ndcg_batch = []
            for X_val, y_val in val_loader:
                
                X_val = X_val.to('cuda:0')
                y_val = y_val.to('cuda:0')
                
                y_pred = model.forward(X_val.long())[:, -1, :]
                
                val_loss = loss_function(y_pred.view(-1, 3955), y_val.long().view(-1))
                val_losses.append(val_loss.item())
                
                recall_batch.append(recall_k(y_pred, y_val, k=10))
#                 print(y_pred.shape)
#                 print(y_val.shape)
                ndcg_batch.append(ndcg_k(y_pred, y_val))

#             print(counter, np.mean(recall_batch).round(3), np.round(loss.item(), 4), np.mean(val_losses).round(4))
            print("Epoch: ", counter)
            print("Recall@10: ", np.mean(recall_batch).round(3))
            print("NDCG: ", np.mean(ndcg_batch).round(3))
            print("Train Loss: ", np.round(loss.item(), 4))
            print("Val Loss: ", np.round(val_losses, 4))

Epoch:  500
Recall@10:  0.196
NDCG:  0.089
Train Loss:  5.1276
Val Loss:  [5.8991 5.6554 5.6385 5.6334 5.8616 5.9044 5.7201 5.6048 5.6758 5.6505
 5.6554 5.759 ]
Epoch:  1000
Recall@10:  0.21
NDCG:  0.095
Train Loss:  5.14
Val Loss:  [5.8888 5.6883 5.632  5.4712 5.7263 5.6678 5.8513 5.6294 5.7817 5.6129
 5.9256 5.472 ]
Epoch:  1500
Recall@10:  0.202
NDCG:  0.092
Train Loss:  5.0753
Val Loss:  [5.6284 5.538  5.9565 5.9021 5.668  5.6578 5.5701 5.6828 5.5238 5.843
 5.5752 5.732 ]
Epoch:  2000
Recall@10:  0.207
NDCG:  0.094
Train Loss:  5.0895
Val Loss:  [5.7912 5.6837 5.4592 5.8386 5.5511 5.8855 5.6846 5.5114 5.7323 5.7106
 5.5943 5.4645]
Epoch:  2500
Recall@10:  0.211
NDCG:  0.096
Train Loss:  4.9945
Val Loss:  [5.5091 5.7889 5.6641 5.7618 5.754  5.7075 5.5055 5.4691 5.762  5.7103
 5.6864 5.5916]
Epoch:  4000
Recall@10:  0.238
NDCG:  0.108
Train Loss:  4.9754
Val Loss:  [5.5326 5.4327 5.6186 5.6032 5.701  5.5467 5.6744 5.6192 5.7823 5.5463
 5.5649 5.5045]
Epoch:  4500
Recall@10:  0.22
NDC

KeyboardInterrupt: 

In [29]:
test_dataset = ValidMaskedDataset(torch.tensor(list(test.values())), end_code)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=True)
recall_batch = []
ndcg_batch = []
for X_val, y_val in test_loader:

    X_val = X_val.to('cuda:0')
    y_val = y_val.to('cuda:0')

    y_pred = model.forward(X_val.long())[:, -1, :]

    val_loss = loss_function(y_pred.view(-1, 3955), y_val.long().view(-1))
    val_losses.append(val_loss.item())

    recall_batch.append(recall_k(y_pred, y_val, k=10))
#                 print(y_pred.shape)
#                 print(y_val.shape)
    ndcg_batch.append(ndcg_k(y_pred, y_val))

#             print(counter, np.mean(recall_batch).round(3), np.round(loss.item(), 4), np.mean(val_losses).round(4))
print("Epoch: ", counter)
print("Recall@10: ", np.mean(recall_batch).round(3))
print("NDCG: ", np.mean(ndcg_batch).round(3))
print("Train Loss: ", np.round(loss.item(), 4))
print("Val Loss: ", np.round(val_losses, 4))

Epoch:  98318
Recall@10:  0.302
NDCG:  0.137
Train Loss:  4.4358
Val Loss:  [5.1515 5.2656 5.2731 5.3579 5.3787 5.2733 5.1833 5.1794 5.5037 5.4574
 5.4166 5.8056 5.4431 5.3379 5.2369 5.3678 5.0715 5.4771 5.1553 5.3522
 5.4999 5.376  5.2944 5.7084 5.584  5.377  5.3392 5.346  5.6916 5.3703
 5.5946 5.1375 5.3139 5.4448 5.4744 5.5418 5.3784 5.3557 5.4725 5.6016
 5.1977 5.3131 5.2406 5.5041 5.5196 5.7267 5.159  5.4073 5.3516 5.5976
 5.5631 5.1783 5.4084 5.3012 5.5095 5.2314 5.3875 5.4784 5.245  5.11
 5.4518 5.5841 5.323  5.3951 5.3579 5.4905 5.3038 5.4309 5.3803 5.538
 5.4747 5.2859]


In [None]:
epochs = 15000
for epoch in range(epochs):
    for X, y in train_loader:

        optimizer.zero_grad()
        
        X = X.to('cuda:0')
        y = y.to('cuda:0')

        loss = loss_function(model.forward(X).view(-1, 3955), y.view(-1))

        losses.append(loss.item())
        loss.backward()
         
        optimizer.step()
        
        counter += 1
        if counter % 500 == 0:
            val_losses = []
            recall_batch = []
            ndcg_batch = []
            for X_val, y_val in val_loader:
                
                X_val = X_val.to('cuda:0')
                y_val = y_val.to('cuda:0')
                
                y_pred = model.forward(X_val.long())[:, -1, :]
                
                val_loss = loss_function(y_pred.view(-1, 3955), y_val.long().view(-1))
                val_losses.append(val_loss.item())
                
                recall_batch.append(recall_k(y_pred, y_val, k=10))
#                 print(y_pred.shape)
#                 print(y_val.shape)
                ndcg_batch.append(ndcg_k(y_pred, y_val))

#             print(counter, np.mean(recall_batch).round(3), np.round(loss.item(), 4), np.mean(val_losses).round(4))
            print("Epoch: ", counter)
            print("Recall@10: ", np.mean(recall_batch).round(3))
            print("NDCG: ", np.mean(ndcg_batch).round(3))
            print("Train Loss: ", np.round(loss.item(), 4))
            print("Val Loss: ", np.round(val_losses, 4))