In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import SGD
from torch.nn import BCELoss
from torch.nn import CrossEntropyLoss
from torch.nn import functional as F
import matplotlib.pyplot as plt

from scipy import sparse
import os
from os import path

import pickle
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')

import time
import random

In [None]:
DP_DIR = "Data_preprocessing"
export_dir = Path(os.getcwd())
files_path = Path(export_dir.parent, DP_DIR)

In [None]:
train_data_mixed = pd.read_csv(Path(files_path,'train_data.csv'))
test_data = pd.read_csv(Path(files_path,'test_data.csv'))

In [None]:
num_users = 19155
num_items = 9639

In [None]:
items_array = torch.eye(num_items)

In [None]:
train_array = train_data_mixed.iloc[:,:-3].to_numpy()
test_array = test_data.iloc[:,:-2].to_numpy()
test_y_pos = test_data['y_positive'].to_numpy()

In [None]:
test_array_without_item = test_array.copy()
for i in range(len(test_array)):
    test_array_without_item[i][test_y_pos[i]] = 0 

In [None]:
class BaseModel(nn.Module):
    """
    Base model class
    """
    def __init__(self):
        super(BaseModel, self).__init__()

    def forward(self, *input):
        pass

    def train_one_epoch(self, *input):
        pass

    def predict(self, eval_users, eval_pos, test_batch_size):
        pass

In [None]:
# Define VAE model

class MultVAE(BaseModel):
    """
    Variational Autoencoder with Multninomial Likelihood model class
    """
    def __init__(self, model_conf, num_users, num_items, device):
        """
        :param model_conf: model configuration
        :param num_users: number of users
        :param num_items: number of items
        :param device: choice of device
        """
        super(MultVAE, self).__init__()
        self.num_users = num_users
        self.num_items = num_items

        if isinstance(model_conf['enc_dims'], str):
            model_conf['enc_dims'] = eval(model_conf['enc_dims'])
        self.enc_dims = [self.num_items] + model_conf['enc_dims']
        self.dec_dims = self.enc_dims[::-1]
        self.dims = self.enc_dims + self.dec_dims[1:]

        self.total_anneal_steps = model_conf['total_anneal_steps']
        self.anneal_cap = model_conf['anneal_cap']

        self.dropout = model_conf['dropout']
        # self.reg = model_conf.reg

        self.eps = 1e-6
        self.anneal = 0.
        self.update_count = 0

        self.device = device

        self.encoder = nn.ModuleList()
        for i, (d_in, d_out) in enumerate(zip(self.enc_dims[:-1], self.enc_dims[1:])):
            if i == len(self.enc_dims[:-1]) - 1:
                d_out *= 2
            self.encoder.append(nn.Linear(d_in, d_out))
            if i != len(self.enc_dims[:-1]) - 1:
                self.encoder.append(nn.ReLU())

        self.decoder = nn.ModuleList()
        for i, (d_in, d_out) in enumerate(zip(self.dec_dims[:-1], self.dec_dims[1:])):
            self.decoder.append(nn.Linear(d_in, d_out))
            if i != len(self.dec_dims[:-1]) - 1:
                self.decoder.append(nn.ReLU())
                
        self.to(self.device)

    def forward(self, rating_matrix):
        """
        Forward pass
        :param rating_matrix: rating matrix
        """
        # encoder
        if len(rating_matrix.shape) == 1:
            rating_matrix = torch.unsqueeze(rating_matrix, 0)
        h = F.dropout(F.normalize(rating_matrix, dim=-1), p=self.dropout, training=self.training)
        for layer in self.encoder:
            h = layer(h)

        # sample
        mu_q = h[:, :self.enc_dims[-1]]
        logvar_q = h[:, self.enc_dims[-1]:]  # log sigmod^2  batch x 200
        std_q = torch.exp(0.5 * logvar_q)  # sigmod batch x 200
        
        epsilon = torch.zeros_like(std_q).normal_(mean=0, std=0.01)
        sampled_z = mu_q + self.training * epsilon * std_q

        output = sampled_z
        for layer in self.decoder:
            output = layer(output)

        if self.training:
            kl_loss = ((0.5 * (-logvar_q + torch.exp(logvar_q) + torch.pow(mu_q, 2) - 1)).sum(1)).mean()
            return output, kl_loss
        else:
            return output

    def train_one_epoch(self, dataset, optimizer, batch_size):
        """
        Train model for one epoch
        :param dataset: given data
        :param optimizer: choice of optimizer
        :param batch_size: batch size
        :param verbose: verbose
        :return: model loss
        """
        self.train()

        # user, item, rating pairs
        train_matrix = dataset

        num_training = train_matrix.shape[0]
        num_batches = int(np.ceil(num_training / batch_size))
        perm = np.random.permutation(num_training)

        loss = 0.0
        for b in range(num_batches):
            optimizer.zero_grad()

            if (b + 1) * batch_size >= num_training:
                batch_idx = perm[b * batch_size:]
            else:
                batch_idx = perm[b * batch_size: (b + 1) * batch_size]
            batch_matrix = torch.FloatTensor(train_matrix[batch_idx]).to(self.device)

            if self.total_anneal_steps > 0:
                self.anneal = min(self.anneal_cap, 1. * self.update_count / self.total_anneal_steps)
            else:
                self.anneal = self.anneal_cap

            pred_matrix, kl_loss = self.forward(batch_matrix)

            # cross_entropy
            ce_loss = -(F.log_softmax(pred_matrix, 1) * batch_matrix).sum(1).mean()

            batch_loss = ce_loss + kl_loss * self.anneal

            batch_loss.backward()
            optimizer.step()

            self.update_count += 1

            loss += batch_loss
            if b % 200 == 0:
                print('(%3d / %3d) loss = %.4f' % (b, num_batches, batch_loss))
        return loss

    def predict(self, eval_users, test_batch_size):
        """
        Predict the model on test set
        :param eval_users: evaluation (test) user
        :param eval_pos: position of the evaluated (test) item
        :param test_batch_size: batch size for test set
        :return: predictions
        """
        with torch.no_grad():
            input_matrix = torch.Tensor(eval_users).to(self.device)
            preds = np.zeros_like(input_matrix.cpu())

            num_data = input_matrix.shape[0]
            num_batches = int(np.ceil(num_data / test_batch_size))
            perm = list(range(num_data))
            for b in range(num_batches):
                if (b + 1) * test_batch_size >= num_data:
                    batch_idx = perm[b * test_batch_size:]
                else:
                    batch_idx = perm[b * test_batch_size: (b + 1) * test_batch_size]
                    
                test_batch_matrix = input_matrix[batch_idx]
                batch_pred_matrix = self.forward(test_batch_matrix)
                batch_pred_matrix.masked_fill(test_batch_matrix.bool(), float('-inf'))
                preds[batch_idx] = batch_pred_matrix.detach().cpu().numpy()
        return preds

In [None]:
config= {
    "data_name": "ml-1m",
    "train_ratio":0.8,
  
    "enc_dims": [512,128],
    "dropout": 0.5,
    "anneal_cap": 0.2,
    "total_anneal_steps": 200000,
  
    "num_epochs": 500,
    "batch_size": 512,
    "test_batch_size": 512,
    "learning_rate": 0.01,
    "early_stop": True,
    "patience": 50,
  
    "top_k": [100]
  }


In [None]:
# Create VAE model and train in on GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("num_users is ", num_users)
print("num_items is ", num_items)
model = MultVAE(config, num_users,num_items, device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
# Trianing loop for VAE
# at each epoch, save the model's parameters

train_losses = []
test_losses = []
total_test_losses = []
epochs = 1
for epoch in range(epochs):
    test_loss=0
    total_loss = 0
    if epoch==7:
        optimizer.lr=0.001
    model.train()
    loss = model.train_one_epoch(train_array, optimizer,512)
    train_losses.append(float(loss/num_users))
    torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
    }, path+f"\\checkpoints\\VAE_epoch_{epoch}.pt")
    
    
    model.eval() 
    predictions = torch.Tensor(model.predict(test_array_without_item, num_items)).to(device)
    ce_losses = -(F.log_softmax(predictions, 1) * torch.Tensor(test_array-test_array_without_item).to(device)).sum(1).mean()
    total_loss += ce_losses.item()
    total_test_losses.append(total_loss)
    
    print(total_test_losses[-1])

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.arange(len(train_losses)), train_losses)
plt.legend(['Train loss'])
plt.axvline(x=np.argmin(train_losses))
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.arange(len(total_test_losses)), total_test_losses)
plt.legend(['Total test loss'])
plt.axvline(x=np.argmin(total_test_losses))
plt.show()

## Load best state for evaluations

In [None]:
# Load the model that had the lowest test loss

checkpoint = torch.load(path+f"\\checkpoints\\VAE_epoch_{np.argmin(total_test_losses)}.pt")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()

In [None]:
# freeze the model

for param in model.parameters():
    param.requires_grad= False

In [None]:
# check the mean test loss

predictions = torch.Tensor(model.predict(test_array_without_item, 20)).to(device)
ce_loss = -(F.log_softmax(predictions, 1) * torch.Tensor(test_array).to(device)).sum(1).mean()
print(float(ce_loss)/test_array_without_item.shape[0])

In [None]:
softmax = nn.Softmax()

### Metrics for evaluating VAE recommender 

In [None]:
def get_top_k(user_vector, original_user_vector, num_items, model, top_k):
    item_prob_dict = {}
    user_tensor = torch.Tensor(user_vector).to(device)
    output_model = softmax(model(user_tensor)[0]).cpu().detach().numpy()
    neg = np.ones_like(original_user_vector)- original_user_vector
    output = neg*output_model
    for i in range(len(output)):
        if output[i] > 0:
            item_prob_dict[i]=output[i]
    sorted_items_by_prob  = sorted(item_prob_dict.items(), key=lambda item: item[1],reverse=True)
    top_k = min(top_k, len(sorted_items_by_prob))
    return dict(sorted_items_by_prob[:top_k])

In [None]:
def get_index_in_the_list(user_vector, original_user_vector, item_id, num_items, model):
    top_k_list = list(get_top_k(user_vector, original_user_vector, num_items, model, num_items).keys())
    return top_k_list.index(item_id)

In [None]:
def get_HR_at_k(users_array, y_array, model, k):
    count=0
    for i in range(len(users_array)):
        user_vec = users_array[i]
        item_id = y_array[i]
        index = get_index_in_the_list(user_vec, user_vec, item_id, num_items, model)+1
        if index<=k:
            count+=1
    return count/len(users_array)

In [None]:
def VAE_hit_rate_and_MRR(test_array, num_items, k, model):
    counter_10 = 0
    counter_50 = 0
    counter_100 = 0
    RR = 0
    for i in range(test_array.shape[0]):
        item_id = test_array[i][-1]
        user_id = test_array[i][-2]
        item_vector = np.array(items_array[item_id])
        user_vector = test_array[i][:-2] - item_vector
        index = get_index_in_the_list(user_vector, user_vector, item_id, num_items, model) +1 
        
        if index <= 10:
            counter_10 +=1 
        if index <= 50:
            counter_50 +=1 
        if index <= 100:
            counter_100 +=1             
        RR+= np.reciprocal(index)
        
        if i%100 == 0:
            print(i)
    return counter_10/test_array.shape[0], counter_50/test_array.shape[0], counter_100/test_array.shape[0],  RR/test_array.shape[0]

In [None]:
get_HR_at_k(test_array_without_item,test_y_pos, model,10)

### Check the recommendations distribution

In [None]:
def get_user_recommended_item(user_tensor, recommender):
    user_res = recommender(user_tensor)[0]
    user_catalog = torch.ones_like(user_tensor)-user_tensor
    user_recommenations = torch.mul(user_res, user_catalog)
    return(torch.argmax(user_recommenations))

In [None]:
topk_train = {}
for i in range(len(train_array)):
    vec = train_array[i]
    tens = torch.Tensor(vec).to(device)
    topk_train[i] = int(get_user_recommended_item(tens, model).cpu().detach().numpy())

In [None]:
topk_test = {}
for i in range(len(test_array)):
    vec = test_array[i]
    tens = torch.Tensor(vec).to(device)
    topk_test[i] = int(get_user_recommended_item(tens, model).cpu().detach().numpy())

In [None]:
plt.hist(topk_train.values(), bins=40)
plt.show() 

In [None]:
plt.hist(topk_test.values(), bins=40)
plt.show() 