In [None]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import shap
export_dir = os.getcwd()
from pathlib import Path
from scipy import sparse
from os import path

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import SGD
from torch.nn import BCELoss
from torch.nn import CrossEntropyLoss
import torch.nn.functional as F
import pickle

import warnings

In [None]:
from torch.nn import Softmax
softmax = nn.Softmax()

In [None]:
data_name = "ML1M" ### Can be ML1M, Yahoo, Pinterest
recommender_name = "MLP" ### Can be MLP, VAE
DP_DIR = Path("processed_data", data_name) 
export_dir = Path(os.getcwd())
files_path = Path(export_dir.parent, DP_DIR)
checkpoints_path = Path(export_dir.parent, "checkpoints")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
output_type_dict = {
    "VAE":"multiple",
    "MLP":"single"
}

num_users_dict = {
    "ML1M":6037,
    "Yahoo":13797, 
    "Pinterest":19155
}

num_items_dict = {
    "ML1M":3381,
    "Yahoo":4604, 
    "Pinterest":9362
}

recommender_path_dict = {
    ("ML1M","VAE"): Path(checkpoints_path, "VAE_ML1M_0.0007_128_10.pt"),
    ("ML1M","MLP"):Path(checkpoints_path, "MLP1_ML1M_0.0076_256_7.pt"),
    
    ("Yahoo","VAE"): Path(checkpoints_path, "VAE_Yahoo_0.0001_128_13.pt"),
    ("Yahoo","MLP"):Path(checkpoints_path, "MLP2_Yahoo_0.0083_128_1.pt"),
    
    ("Pinterest","VAE"): Path(checkpoints_path, "VAE_Pinterest_0.0002_32_12.pt"),
    ("Pinterest","MLP"):Path(checkpoints_path, "MLP_Pinterest_0.0062_512_21_0.pt")
}

hidden_dim_dict = {
    ("ML1M","VAE"): [512,128],
    ("ML1M","MLP"): 32,
    
    ("Yahoo","VAE"): [512,128],
    ("Yahoo","MLP"):32,

    ("Pinterest","VAE"): [512,128],
    ("Pinterest","MLP"):512,

}

### MLP recommender

In [None]:
class MLP(nn.Module):
    def __init__(self, hidden_size, **kw):
        super(MLP, self).__init__()
        user_size = kw['num_items']
        item_size = kw['num_items']
        self.device = kw['device']
        self.users_fc = nn.Linear(user_size, hidden_size, bias = True).to(self.device)
        self.items_fc = nn.Linear(item_size, hidden_size, bias = True).to(self.device)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, user_tensor, item_tensor):
        user_vec = self.users_fc(user_tensor.to(self.device))
        item_vec = self.items_fc(item_tensor.to(self.device))
        output = torch.matmul(user_vec, item_vec.T).to(self.device)
        return self.sigmoid(output).to(self.device)

### MLP Wrapper for SHAP

In [None]:
class MLPWrapper(MLP):
    def __init__(self, hidden_size, cluster_to_items, device, num_items, all_items_tensor, items_array, output_type, recommender_name):
        super().__init__(hidden_size=hidden_size, device=device, num_items=num_items)
        self.cluster_to_items = cluster_to_items
        self.items_array = items_array
        self.device = device
        self.num_items = num_items
        
    def preprocess(self, batch):
        items = batch[:, 0]
        clusters = batch[:, 1:]
        n_clusters = clusters.shape[1]

        items_tensor = torch.Tensor(self.items_array[items]).to(self.device)
        user_tensor = torch.zeros((len(batch), self.num_items), dtype=torch.float).to(self.device)

        for cluster in range(n_clusters):
            cluster_indices = torch.tensor(clusters[:, cluster], dtype=torch.float).to(self.device)
            user_tensor[:, self.cluster_to_items[cluster]] = cluster_indices.unsqueeze(1)

        return user_tensor, items_tensor

    def forward(self, batch):
        batch_size = 256  
        outputs = []
        for i in range(0, len(batch), batch_size):
            mini_batch = batch[i:i+batch_size]
            user_tensor, items_tensor = self.preprocess(mini_batch)
            output = super().forward(user_tensor, items_tensor)
            outputs.append(torch.diag(output).detach().cpu().numpy())
        return np.concatenate(outputs)

### Read data

In [None]:
train_data = pd.read_csv(Path(files_path,f'train_data_{data_name}.csv'), index_col=0)
test_data = pd.read_csv(Path(files_path,f'test_data_{data_name}.csv'), index_col=0)
train_array = train_data.to_numpy()
test_array = test_data.to_numpy()

In [None]:
#users' ids in the test dataset
row_test_indices = np.arange(test_array.shape[0]) + train_array.shape[0]

In [None]:
with open(Path(files_path, f'top1_test_{data_name}_{recommender_name}.pkl'), 'rb') as f:
    top1_test = pickle.load(f)

In [None]:
with open(Path(files_path, f'top1_train_{data_name}_{recommender_name}.pkl'), 'rb') as f:
    top1_train = pickle.load(f)

In [None]:
output_type = output_type_dict[recommender_name] ### Can be single, multiple
num_users = num_users_dict[data_name] 
num_items = num_items_dict[data_name] 

In [None]:
items_array = np.eye(num_items)
all_items_tensor = torch.Tensor(items_array).to(device)

In [None]:
kw_dict = {'device':device,
          'num_items': num_items,
          'all_items_tensor':all_items_tensor,
          'items_array':items_array,
          'output_type':output_type,
          'recommender_name':recommender_name}

In [None]:
hidden_dim_dict = {
    ("ML1M","VAE"): None,
    ("ML1M","MLP"): 32,

    ("Yahoo","VAE"): None,
    ("Yahoo","MLP"):32,
    
    ("Pinterest","VAE"): None,
    ("Pinterest","MLP"):512,
}

In [None]:
hidden_dim = hidden_dim_dict[(data_name,recommender_name)]

In [None]:
model = MLP(hidden_dim, **kw_dict)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
checkpoint = torch.load(Path(checkpoints_path,"MLP1_ML1M_0.0076_256_7.pt"))
model.load_state_dict(checkpoint)

In [None]:
for param in model.parameters():
    param.requires_grad= False

In [None]:
K = 100
u_train = torch.tensor(train_array).float()
v_train = all_items_tensor

### Clustering

In [None]:
np.random.seed(3)
# Cluster items using k-means
from sklearn.cluster import KMeans
import numpy as np
k = 10

kmeans = KMeans(n_clusters=k)
clusters = kmeans.fit_predict(np.transpose(u_train))

In [None]:
item_clusters = kmeans.predict(np.transpose(u_train))

# Create mapping from items to clusters
item_to_cluster = {}
# Create mapping from clusters to items
cluster_to_items = {}
for i, cluster in enumerate(item_clusters):
    item_to_cluster[i] = cluster
    if(cluster not in cluster_to_items.keys()):
        cluster_to_items[cluster] = []
    cluster_to_items[cluster].append(i)

In [None]:
u_test = torch.tensor(test_array).float()

In [None]:
user_to_clusters = np.zeros((u_test.shape[0],10))

In [None]:
for i in cluster_to_items.keys():
    user_to_clusters[:,i] = np.sum(u_test.cpu().detach().numpy().T[cluster_to_items[i]], axis=0)

In [None]:
user_to_clusters_bin =  np.where(user_to_clusters > 0, 1, 0)

In [None]:
user_to_clusters_train = np.zeros((u_train.shape[0],10))

In [None]:
user_to_clusters_test = np.zeros((u_test.shape[0],10))

In [None]:
default_value = 0
target_items_test = list(top1_test.values())
target_items_train = list(top1_train.values())

In [None]:
for i in cluster_to_items.keys():
    user_to_clusters_train[:,i] = np.sum(u_train.cpu().detach().numpy().T[cluster_to_items[i]], axis=0)

In [None]:
user_to_clusters_train_bin =  np.where(user_to_clusters_train > 0, 1, 0)

In [None]:
col2 = list(top1_train.values())
input_train_array= np.insert(user_to_clusters_train_bin, 0, col2, axis=1).astype(int)

In [None]:
for i in cluster_to_items.keys():
    user_to_clusters_test[:,i] = np.sum(u_test.cpu().detach().numpy().T[cluster_to_items[i]], axis=0)

In [None]:
user_to_clusters_test_bin =  np.where(user_to_clusters_test > 0, 1, 0)

In [None]:
col2 = list(top1_test.values())
input_test_array= np.insert(user_to_clusters_test_bin, 0, col2, axis=1).astype(int)

In [None]:
wrap_model=  MLPWrapper(hidden_dim, cluster_to_items,**kw_dict)

### SHAP

In [None]:
K=50

In [None]:
sampled_subset = shap.sample(input_train_array,K)

In [None]:
explainer = shap.KernelExplainer(wrap_model,sampled_subset)

In [None]:
shap_values_test = explainer.shap_values(input_test_array)

In [None]:
col1 = row_test_indices
input_test_array= np.insert(shap_values_test[:, 1:], 0, col1,axis=1)

In [None]:
with open(Path(files_path,f'item_to_cluster_{recommender_name}_{data_name}.pkl'), 'wb') as f:
    pickle.dump(item_to_cluster, f)

In [None]:
with open(Path(files_path,f'shap_values_{recommender_name}_{data_name}.pkl'), 'wb') as f:
    pickle.dump(input_test_array, f)