In [1]:
import torch.nn as nn
import numpy as np
import torch
import requests
from dataclasses import dataclass
from collections import defaultdict
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

import sys
sys.path.append('BERT4Rec-VAE-Pytorch')

from models.bert import BERTModel
from models.bert_modules.bert import BERT

import warnings
warnings.filterwarnings("ignore")

### Service functions

In [2]:
def equalize_seq(seq, max_length, end_code):
  
  if len(seq) >= max_length:
    return seq[:max_length]
  else:
    return (max_length - len(seq)) * [end_code] + seq

def recall_k(y_pred, y_true, k=10):
    
    top_k = torch.topk(y_pred, k).indices.tolist()
    result = [el[0] in el[1] for el in zip(y_true.tolist(), top_k)]
    
    return np.mean(result)

### Load data

In [3]:
data = (pd.read_csv('ml-1m/ratings.dat', 
                    sep='::',
                    header=None,
                    index_col=0,
                    names=['user_id', 'movie_id', 'rating', 'ts'], 
                    engine='python')
          .reset_index(drop=False)
       )

sequences = data.sort_values(by=['user_id', 'ts']).groupby('user_id')['movie_id'].agg(lambda x: list(x)).to_dict()
sequences = {u: s for u, s in sequences.items() if len(s) > 0}

mask_code = 0
max_len = 100
end_code = data['movie_id'].max() + 1

### Train / Valid / Test Split

In [4]:
train = defaultdict(list)
val = {}
test = {}

for user, sequence in sequences.items():

    train[user] = equalize_seq(sequence[:-1], max_length=max_len, end_code=end_code)

    if np.random.choice([0, 1]):
        val[user] = equalize_seq(sequence, max_length=max_len, end_code=end_code)
      
    else:
        test[user] = equalize_seq(sequence, max_length=max_len, end_code=end_code)

test_indexes = np.array(test.keys())
val_indexes = np.array(val.keys())

### Define Model

In [5]:
@dataclass
class BertConf:
    bert_max_len: int
    num_items: int
    bert_num_blocks: int
    bert_num_heads: int
    bert_hidden_units: int
    bert_dropout: float = 0.1
    model_init_seed: int = 42

conf = BertConf(bert_max_len=max_len, 
                num_items=end_code + 1, 
                bert_num_blocks=2, 
                bert_num_heads=2, 
                bert_hidden_units=100)

model = BERTModel(conf).to('cuda:0')

### Define Dataloader

In [6]:
import torch
from torch.utils.data import Dataset, DataLoader

class TrainMaskedDataset(Dataset):
    def __init__(self, data, end_code=-1, p=0.2):
        self.data = data
        self.p = p
        self.end_code=end_code

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # Get X and y from the initial dataset

        batch = self.data[index]
        mask = torch.bernoulli(batch, p=self.p)

        # Create train
        X = batch * (1 - mask)

        # Create target
        y = batch * mask
        y[mask == 0] = self.end_code

        return X, y
    
class ValidMaskedDataset(Dataset):
    def __init__(self, data, end_code):
        self.data = data
        self.end_code=end_code

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # Get X and y from the initial dataset

        batch = self.data[index]
        mask = torch.zeros(batch.size())
        mask[-1] = 1

        # Create train
        X = batch * (1 - mask)

        # Create target
        y = batch[-1]
        
        return X, y

### Train Model

In [None]:
dataset = TrainMaskedDataset(torch.tensor(list(train.values())), end_code)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=256, shuffle=True)

val_dataset = ValidMaskedDataset(torch.tensor(list(val.values())), end_code)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=256, shuffle=True)

# Using an Adam Optimizer with lr = 0.1
loss_function = torch.nn.CrossEntropyLoss(ignore_index=end_code, reduction='mean')
optimizer = torch.optim.Adam(model.parameters())

epochs = 5000
outputs = []
losses = []

counter = 0
recall_list = []
for epoch in range(epochs):
    for X, y in train_loader:

        optimizer.zero_grad()
        
        X = X.to('cuda:0')
        y = y.to('cuda:0')

        loss = loss_function(model.forward(X).view(-1, 3955), y.view(-1))

        losses.append(loss.item())
        loss.backward()
         
        optimizer.step()
        
        counter += 1
        if counter % 500 == 0:
            val_losses = []
            recall_batch = []
            for X_val, y_val in val_loader:
                
                X_val = X_val.to('cuda:0')
                y_val = y_val.to('cuda:0')
                
                y_pred = model.forward(X_val.long())[:, -1, :]
                
                val_loss = loss_function(y_pred.view(-1, 3955), y_val.long().view(-1))
                val_losses.append(val_loss.item())
                
                recall_batch.append(recall_k(y_pred, y_val, k=10))

            print(counter, np.mean(recall_batch).round(3), np.round(loss.item(), 4), np.mean(val_losses).round(4))

500 0.081 6.1798 6.5893
1000 0.121 5.7555 6.1733
1500 0.157 5.4007 6.0148
2000 0.167 5.406 5.9092
2500 0.19 5.2223 5.8031
3000 0.2 5.1434 5.788
3500 0.204 5.1916 5.7396
4000 0.193 5.0306 5.739
4500 0.215 4.9869 5.6909
5000 0.209 5.113 5.6814
5500 0.208 4.9688 5.6987
6000 0.213 4.9893 5.653
6500 0.223 4.9258 5.6269
7000 0.222 4.8817 5.6322
7500 0.229 4.9849 5.5975
8000 0.223 4.8508 5.5905
8500 0.241 4.8254 5.568
9000 0.24 4.9057 5.5633
9500 0.243 4.7904 5.5364
10000 0.234 4.8733 5.6357
10500 0.24 4.8073 5.5718
11000 0.243 4.8046 5.5552
11500 0.236 4.8089 5.5365
12000 0.255 4.7918 5.5217
12500 0.246 4.8708 5.554
13000 0.245 4.7596 5.5305
