# Imports

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Softmax
from torch.nn import Module
from torch.nn import BCELoss
from torch.nn import CrossEntropyLoss
import time
from torch.nn import functional as F

from scipy import sparse
import os
from os import path

import pickle
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')

import time
import random

In [None]:
softmax = nn.Softmax()

### Data preprocessing

In [None]:
DP_DIR = "Data_preprocessing"
export_dir = Path(os.getcwd())
files_path = Path(export_dir.parent, DP_DIR)

In [None]:
train_data_mixed = pd.read_csv(Path(files_path,'train_data_mixed.csv'))
test_data = pd.read_csv(Path(files_path,'test_data.csv'))
train_array = train_data_mixed.to_numpy()
test_array = test_data.to_numpy()

In [None]:
with open(Path(files_path,'items_values_dict_ML1.pkl'), 'rb') as f:
    items_values_dict = pickle.load(f)

with open(Path(files_path,'prob_dict.pkl'), 'rb') as f:
    prob_dict = pickle.load(f)

items_values= pd.read_csv(Path(files_path,'items_values.csv'))
items_array = items_values.to_numpy()

In [None]:
def get_user_recommended_item(user_tensor, recommender):
    user_res = softmax(recommender(user_tensor)[0])
    user_catalog = torch.ones_like(user_tensor)-user_tensor
    user_recommenations = torch.mul(user_res, user_catalog)
    return(torch.argmax(user_recommenations))

In [None]:
#Get users vectors to create topk
unique_indices = np.unique(train_array[:,-3], return_index=True, axis=0)[1]

# create a new array with only the unique users
train_unique_arr = train_array[unique_indices, :]

# VAE Model definition and loading

In [None]:
import ipynb
import importlib
from ipynb.fs.defs.VAE import MultVAE
importlib.reload(ipynb.fs.defs.VAE)
from ipynb.fs.defs.VAE import MultVAE

In [None]:
config= {
    "data_name": "ml-1m",
    "train_ratio":0.8,
  
    "enc_dims": [512,128],
    "dropout": 0.5,
    "anneal_cap": 0.2,
    "total_anneal_steps": 200000,
  
    "num_epochs": 500,
    "batch_size": 512,
    "test_batch_size": 512,
    "learning_rate": 0.01,
    "early_stop": True,
    "patience": 50,
  
    "top_k": [100]
  }


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_users = 6040
num_items = 3706
print("num_users is ", num_users)
print("num_items is ", num_items)
VAE_recommender = MultVAE(config, num_users, num_items, device)
optimizer = torch.optim.Adam(VAE_recommender.parameters(), lr=0.001)

### load the trained VAE recommender

In [None]:
checkpoint = torch.load(Path(files_path,"VAE_epoch_9.pt"))
VAE_recommender.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
VAE_recommender.eval()

### Recommender Freezing

In [None]:
for param in VAE_recommender.parameters():
    param.requires_grad = False

### Create dictionary of the top recommended item for train and test users

In [None]:
topk_train = {}
for i in range(len(train_array)):
    vec = train_array[i][:-3]
    user_id = train_array[i][-3]
    tens = torch.Tensor(vec).to(device)
    topk_train[user_id] = int(get_user_recommended_item(tens, VAE_recommender).cpu().detach().numpy())
    
topk_test = {}
for i in range(len(test_array)):
    vec = test_array[i][:-2]
    user_id = test_array[i][-2]
    tens = torch.Tensor(vec).to(device)
    topk_test[user_id] = int(get_user_recommended_item(tens, VAE_recommender).cpu().detach().numpy())

# Backbone Model definition and loading

In [None]:
class MLP_G(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(MLP_G, self).__init__()
        self.linear_x = nn.Linear(input_size, hidden_size, bias = False)
        self.linear_y = nn.Linear(input_size, hidden_size, bias = False)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, user, item):
        user_representation = self.linear_x(user.float())
        item_representation = self.linear_y(item.float())
        dot_prod = torch.matmul(user_representation, item_representation.T)
        dot_sigmoid = self.sigmoid(dot_prod)
        
        return dot_sigmoid

In [None]:
class Recommender_G(nn.Module):
    def __init__(self, num_items, hidden_size, device):
        super(Recommender_G, self).__init__()
        self.mlp = MLP_G(num_items, hidden_size).to(device)

    def forward(self, user_vector, item_vector):
        user_vector = user_vector.to(device)
        item_vector = item_vector.to(device)
        output = self.mlp(user_vector, item_vector)
        return output.to(device)

In [None]:
hidden_dim=20
backbone = Recommender_G(num_items, hidden_dim, device)

backbone.load_state_dict(torch.load(Path(files_path,"recommender_model.pt")))
backbone.train()

In [None]:
for param in backbone.parameters():
    param.requires_grad = True

# Metrics help functions

In [None]:
# get top k recommendations
def get_top_k(user_vector, original_user_vector, num_items, model, top_k):
    stime = time.time()
    item_prob_dict = {}
    user_tensor = torch.Tensor(user_vector).to(device)
    output_model = [float(i) for i in softmax(model(user_tensor)[0]).cpu().detach().numpy()]
    neg = np.ones_like(original_user_vector)- original_user_vector
    output = neg*output_model
    for i in range(len(output)):
        if output[i] > 0:
            item_prob_dict[i]=output[i]
    sorted_items_by_prob  = sorted(item_prob_dict.items(), key=lambda item: item[1],reverse=True)
    top_k = min(top_k, len(sorted_items_by_prob))
    return dict(sorted_items_by_prob)

In [None]:
def get_index_in_the_list(user_vector, original_user_vector, item_id, num_items, model):
    """
    get the index of an item in the recommenations ranked list
    """
    top_k_list = list(get_top_k(user_vector, original_user_vector, num_items, model, num_items).keys())
    return top_k_list.index(item_id)

#### LXR based similarity

In [None]:
def find_LXR_mask(x, item_id, item_tensor, modelCombined_g):
    
    user_hist = x
    user_hist[item_id] = 0
    item_id = np.int64(item_id)
    x_masked_g, loss1, loss2, loss3= modelCombined_g(user_hist, item_tensor, item_id, device)
    item_sim_dict = {i: x_masked_g[i].item() for i in range(len(x_masked_g))}    

    return (item_sim_dict)

#### Caclculate the POS@20 metric value for monitoring during train

In [None]:
def calculate_pos_top_k(user_vector,  item_id, items_tensor, num_of_bins, num_items, recommender, modelCombined_g, k=20):

    user_tensor = torch.FloatTensor(user_vector).to(device)
    POS_masked = user_tensor
    POS_masked[item_id] = 0
    user_hist_size = np.sum(user_vector)

    bins = [0] + [len(x) for x in np.array_split(np.arange(user_hist_size), num_of_bins, axis=0)]

    POS_at_20 = [0] * (num_of_bins+1)
    total_items = 0
    
    # returns original tensor
    sim_items = find_LXR_mask(POS_masked, item_id, items_tensor, modelCombined_g)
    POS_sim_items=list(sorted(sim_items.items(), key=lambda item: item[1],reverse=True))

    for i in range(len(bins)):
        total_items += bins[i]
        POS_masked = torch.zeros_like(user_tensor, dtype=torch.float32, device=device)
        for j in POS_sim_items[:total_items]:
            POS_masked[j[0]] = 1
        POS_masked = user_tensor - POS_masked # remove the masked items from the user history 
        POS_index = get_index_in_the_list(POS_masked, user_vector, item_id, num_items, recommender)+1
        POS_at_20[i] = 1 if POS_index <= 20 else 0
        
    res = np.array(POS_at_20)
    return res

# Explainer definition & training

In [None]:
class Explainer_G(nn.Module):
    def __init__(self, backbone, input_size, hidden_size, device):
        super(Explainer_G, self).__init__()
        self.device = device
        
        backbone_children = list(backbone.children())[0]

        self.slice1 = nn.Sequential(*list(backbone_children.children())[:1])
        self.slice2 = nn.Sequential(*list(backbone_children.children())[1:2])
        self.bottleneck = nn.Sequential(
            nn.ReLU(),
            nn.Linear(in_features = hidden_size*2, out_features=input_size),
            nn.Sigmoid()
        ).to(self.device)

    def forward(self, user, item):
        slice1_output = self.slice1(user.float())
        slice2_output = self.slice2(item.float())
        combined_output = torch.cat((slice1_output, slice2_output), dim=-1)
        mask = self.bottleneck(combined_output).to(self.device)
        return mask

In [None]:
class LossModelCombined(torch.nn.Module):
    def __init__(self, alpha_parameter, backbone, recommender, explainer_model_g, hidden_size, device):
        super().__init__()
        self.alpha_parameter = alpha_parameter
        self.recommender =  recommender
        self.explainer_model_g = explainer_model_g
        self.hidden_size = hidden_size
        self.device = device

    def forward(self, x, y_true, item_id, device):
        
        self.x = x.to(device) # user history tensor
        self.y_true = torch.tensor(y_true).float().to(device) # item one hot tensor
        
        mask = self.explainer_model_g(self.x,self.y_true).to(device) # create mask by the explainer model
        # "weakened" history: 
        x_masked = self.x * mask
   
        # the score of the item with the masked user vector:
        y_masked = self.recommender(x_masked)[0]
  
        cross_entropy_loss = -torch.sum((F.log_softmax(y_masked, -1) * self.y_true),-1).mean()
        #l1 loss on mask
        
        l1_loss = torch.mean(torch.abs(mask)).to(device)
        
        #combined loss
        lossComb = cross_entropy_loss + self.alpha_parameter * l1_loss 

        return x_masked, cross_entropy_loss, l1_loss, lossComb

#### Train LXR for different alphas, monitor the train process of POS@20 metric value, calculated on 100 samples test users

In [None]:
alphas = [0.00055, 0.00095, 0.00125, 0.0055]
hidden_dim = 20
torch.manual_seed(58)
np.random.seed(0)
num_of_bins = 10
num_of_rand_users = 100

random_rows = np.random.choice(test_array.shape[0], num_of_rand_users, replace=False)
random_sampled_array = test_array[random_rows]
lxr_epochs = 40
batch_size = 64
loader = torch.utils.data.DataLoader(train_unique_arr, batch_size=batch_size, shuffle=True)
num_batches = int(np.ceil(num_users / batch_size))

# dictionaries for saving the results
train_total_losses = {}
train_alphas = {}
min_losses = {}
pos_at_20_dict = {}
min_pos_at_20 = {}


for run in range(len(alphas)):
    alpha_parameter = alphas[run]
    explainer_model_g = Explainer_G(backbone, num_items, hidden_dim, device).to(device)
    modelCombined_g = LossModelCombined(alpha_parameter, backbone, VAE_recommender, explainer_model_g, hidden_dim, device).to(device)
    lr = 0.01
    optimizer_comb_g = torch.optim.Adam(modelCombined_g.parameters(), lr=lr)
    
    run_losses = []
    run_pos_at_20 = []
    
    for epoch in range(lxr_epochs):
        if epoch==5:
            lr = 0.005
            optimizer_comb_g.lr = lr
        elif epoch == 10:
            lr = 0.001
            optimizer_comb_g.lr = lr
        elif epoch == 20:
            lr = 0.0005
            optimizer_comb_g.lr = lr
        POS_at_20_lxr = np.zeros(num_of_bins+1)
        print(f'epoch = {epoch} alpha = {alpha_parameter}')
        train_loss = 0
        total_ce_loss=0
        total_l1_loss=0
        modelCombined_g.train()
        explainer_model_g.train()
        for batch_index, samples in enumerate(loader):
            # create data for explainer:
            histories = samples[:,:-3]
            user_ids = samples[:,-3]
            top1_item = np.array([topk_train[int(x)] for x in user_ids])
            items_vectors = items_array[top1_item]
            items_tensors = torch.Tensor(items_vectors).to(device)

            # zero grad:
            optimizer_comb_g.zero_grad()
            # forward:
            batch_masks, ce_loss, l1_loss, batch_loss = modelCombined_g(histories, items_tensors, top1_item, device)
            total_ce_loss+= ce_loss
            total_l1_loss+= l1_loss
            train_loss += batch_loss
    
            batch_loss.backward(retain_graph=True)
            optimizer_comb_g.step()
        run_losses.append(train_loss.cpu().detach().numpy()/train_unique_arr.shape[0])
        print(f'Epoch {epoch}, CE loss = {total_ce_loss/train_unique_arr.shape[0]}, l1 loss = {total_l1_loss/train_unique_arr.shape[0]}')

        torch.save({
            'model_state_dict': modelCombined_g.state_dict(),
            'optimizer_state_dict': optimizer_comb_g.state_dict(),
            }, f"LXR_VAE_{alpha_parameter}_{epoch}.pt")

        torch.save({
                'model_state_dict': explainer_model_g.state_dict()
                }, f"LXR_VAE_explainer_{alpha_parameter}_{epoch}.pt")

        #Monitoring on POS metric after each epoch
        modelCombined_g.eval()
        explainer_model_g.eval()
        for j in range(random_sampled_array.shape[0]):
            user_id = random_sampled_array[j][-2]
            user_vector = random_sampled_array[j][:-2]

            #get top1 of this test user item
            top1_item_test = topk_test[user_id]

            item_vector = items_array[top1_item_test]
            items_tensor = torch.Tensor(item_vector).to(device)

            user_vector[top1_item_test] = 0 
            user_tensor = torch.Tensor(user_vector).to(device)

            res = calculate_pos_top_k(user_vector, top1_item_test, items_tensor, num_of_bins, num_items, VAE_recommender, modelCombined_g, k=20)
            POS_at_20_lxr += res

        run_pos_at_20.append(np.mean(POS_at_20_lxr[1:])/random_sampled_array.shape[0])
        print("POS@20 at epoch {:d} is {:.4f} ".format(int(epoch), np.mean(POS_at_20_lxr[1:])/random_sampled_array.shape[0]))
    
    train_total_losses[run] = run_losses # list of the loss of every epoch
    train_alphas[run] = alpha_parameter # the alpha parameter that was used in the run
    min_losses[run] = (np.argmin(run_losses), min(run_losses)) # the epoch which got the lowest loss and the loss 
    pos_at_20_dict[run] = run_pos_at_20 # list of the pos@20 AUC
    min_pos_at_20[run] = (np.argmin(run_pos_at_20), min(run_pos_at_20))


In [None]:
import matplotlib.pyplot as plt

for i in range(len(pos_at_20_dict)):
    plt.plot(pos_at_20_dict[i])
    
plt.legend([f'{train_alphas[i]}' for i in range(len(pos_at_20_dict))])
plt.show()