In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import shap
import os
export_dir = os.getcwd()
from pathlib import Path

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import SGD
from torch.nn import BCELoss
from torch.nn import CrossEntropyLoss
import torch.nn.functional as F

In [None]:
from torch.nn import Softmax
softmax = nn.Softmax()

In [None]:
from sklearn.decomposition import NMF
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
    
from scipy import sparse
from os import path

### NCF recommender

In [None]:
class NCF(nn.Module):
    def __init__(self, factor_num, num_layers,
                    dropout, model, GMF_model=None, MLP_model=None, **kw):
        super(NCF, self).__init__()
        """
        user_num: number of users;
        item_num: number of items;
        factor_num: number of predictive factors;
        num_layers: the number of layers in MLP model;
        dropout: dropout rate between fully connected layers;
        model: 'MLP', 'GMF', 'NeuMF-end', and 'NeuMF-pre';
        GMF_model: pre-trained GMF weights;
        MLP_model: pre-trained MLP weights.
        """        
        self.dropout = dropout
        self.model = model
        self.GMF_model = GMF_model
        self.MLP_model = MLP_model
        self.device = kw['device']
        user_size = kw['num_features']
        item_size = kw['num_items']
        self.embed_user_GMF = nn.Linear(user_size, factor_num, bias = False).to(self.device)
        self.embed_item_GMF = nn.Linear(item_size, factor_num, bias = False).to(self.device)
        self.embed_user_MLP = nn.Linear(
                user_size, factor_num * (2 ** (num_layers - 1)), bias = False).to(self.device)
        self.embed_item_MLP = nn.Linear(
                item_size, factor_num * (2 ** (num_layers - 1)), bias = False).to(self.device)

        MLP_modules = []
        for i in range(num_layers):
            input_size = factor_num * (2 ** (num_layers - i))
            MLP_modules.append(nn.Dropout(p=self.dropout))
            MLP_modules.append(nn.Linear(input_size, input_size//2).to(self.device))
            MLP_modules.append(nn.ReLU())
        self.MLP_layers = nn.Sequential(*MLP_modules)

        if self.model in ['MLP', 'GMF']:
            predict_size = factor_num 
        else:
            predict_size = factor_num * 2
        self.predict_layer = nn.Linear(predict_size, 1).to(self.device)
        self.sigmoid = nn.Sigmoid()
        self._init_weight_()
        
        self.embed_user_GMF.to(self.device)
        self.embed_item_GMF.to(self.device)
        self.embed_user_MLP.to(self.device)
        self.embed_item_MLP.to(self.device)

    def _init_weight_(self):
        """ We leave the weights initialization here. """
        if not self.model == 'NeuMF-pre':
            nn.init.normal_(self.embed_user_GMF.weight, std=0.01)
            nn.init.normal_(self.embed_user_MLP.weight, std=0.01)
            nn.init.normal_(self.embed_item_GMF.weight, std=0.01)
            nn.init.normal_(self.embed_item_MLP.weight, std=0.01)

            for m in self.MLP_layers:
                if isinstance(m, nn.Linear):
                    nn.init.xavier_uniform_(m.weight)
            nn.init.kaiming_uniform_(self.predict_layer.weight, 
                                    a=1, nonlinearity='sigmoid')

            for m in self.modules():
                if isinstance(m, nn.Linear) and m.bias is not None:
                    m.bias.data.zero_()
        else:
            # embedding layers
            self.embed_user_GMF.weight.data.copy_(
                            self.GMF_model.embed_user_GMF.weight)
            self.embed_item_GMF.weight.data.copy_(
                            self.GMF_model.embed_item_GMF.weight)
            self.embed_user_MLP.weight.data.copy_(
                            self.MLP_model.embed_user_MLP.weight)
            self.embed_item_MLP.weight.data.copy_(
                            self.MLP_model.embed_item_MLP.weight)

            # mlp layers
            for (m1, m2) in zip(
                self.MLP_layers, self.MLP_model.MLP_layers):
                if isinstance(m1, nn.Linear) and isinstance(m2, nn.Linear):
                    m1.weight.data.copy_(m2.weight)
                    m1.bias.data.copy_(m2.bias)

            # predict layers
            predict_weight = torch.cat([
                self.GMF_model.predict_layer.weight, 
                self.MLP_model.predict_layer.weight], dim=1)
            precit_bias = self.GMF_model.predict_layer.bias + \
                        self.MLP_model.predict_layer.bias

            self.predict_layer.weight.data.copy_(0.5 * predict_weight)
            self.predict_layer.bias.data.copy_(0.5 * precit_bias)

    def forward(self, user, item):
        if not self.model == 'MLP':
            embed_user_GMF = self.embed_user_GMF(user)
            embed_item_GMF = self.embed_item_GMF(item)
            if embed_user_GMF.shape!=embed_item_GMF.shape:
                user_res = torch.zeros(embed_item_GMF.shape).to(self.device)
                user_res[:] = embed_user_GMF
                embed_user_GMF = user_res
            output_GMF = embed_user_GMF * embed_item_GMF
        if not self.model == 'GMF':
            embed_user_MLP = self.embed_user_MLP(user)
            embed_item_MLP = self.embed_item_MLP(item)
            if embed_user_MLP.shape!=embed_item_MLP.shape:
                user_res = torch.zeros(embed_item_MLP.shape).to(self.device)
                user_res[:] = embed_user_MLP
                embed_user_MLP = user_res
            interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1)
            output_MLP = self.MLP_layers(interaction)

        if self.model == 'GMF':
            concat = output_GMF
        elif self.model == 'MLP':
            concat = output_MLP
        else:
            concat = torch.cat((output_GMF, output_MLP), -1)

        prediction = self.predict_layer(concat)
        prediction = self.sigmoid(prediction)
        return prediction.view(-1)

In [None]:
class MLP_model(nn.Module):
    def __init__(self, hidden_size, num_layers, **kw):
        super(MLP_model, self).__init__()
        self.device = kw['device']
        user_size = kw['num_features']
        item_size = kw['num_items']
        factor_num = hidden_size
        self.embed_user_MLP = nn.Linear(user_size, factor_num * (2 ** (num_layers - 1)), bias = False).to(self.device)
        self.embed_item_MLP = nn.Linear(item_size, factor_num * (2 ** (num_layers - 1)), bias = False).to(self.device)
        
        MLP_modules = []
        for i in range(num_layers):
            input_size = factor_num * (2 ** (num_layers - i))
            MLP_modules.append(nn.Dropout(p=0.5))
            MLP_modules.append(nn.Linear(input_size, input_size//2).to(self.device))
            MLP_modules.append(nn.ReLU())
        self.MLP_layers = nn.Sequential(*MLP_modules)
        
        self.predict_layer = nn.Linear(hidden_size, 1, bias = True).to(self.device)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, user_tensor, item_tensor):
        embed_user_MLP = self.embed_user_MLP(user_tensor.to(self.device))
        embed_item_MLP = self.embed_item_MLP(item_tensor.to(self.device))
        if embed_user_MLP.shape!=embed_item_MLP.shape:
            user_res = torch.zeros(embed_item_MLP.shape).to(self.device)
            user_res[:] = embed_user_MLP
            embed_user_MLP = user_res
        interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1)
        output_MLP = self.MLP_layers(interaction)
        output = self.predict_layer(output_MLP)
        return self.sigmoid(output)

In [None]:
class GMF_model(nn.Module):
    def __init__(self, hidden_size=8, **kw):
        super(GMF_model, self).__init__()
        self.device = kw['device']
        user_size = kw['num_features']
        item_size = kw['num_items']
        self.embed_user_GMF = nn.Linear(user_size, hidden_size, bias = False).to(self.device)
        self.embed_item_GMF = nn.Linear(item_size, hidden_size, bias = False).to(self.device)
        self.predict_layer = nn.Linear(hidden_size, 1, bias = True).to(self.device)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, user_tensor, item_tensor):
        user_vec = self.embed_user_GMF(user_tensor.to(self.device))
        item_vec = self.embed_item_GMF(item_tensor.to(self.device))
        if user_vec.shape!=item_vec.shape:
            user_res = torch.zeros(item_vec.shape).to(self.device)
            user_res[:] = user_vec
            user_vec = user_res
            
        output = self.predict_layer(torch.mul(user_vec, item_vec))
        
        return self.sigmoid(output)

### NCF Wrapper SHAP

In [None]:
class NCFWrapper(nn.Module):
    def __init__(self, model, item_array, cluster_to_items, num_items, device):
        super(NCFWrapper, self).__init__()
        self.model = model
        self.n_items = num_items
        self.cluster_to_items = cluster_to_items
        self.item_array = torch.tensor(item_array, dtype=torch.float, device=device)
        self.device = device
        
    def forward(self, input):
        input = torch.from_numpy(input).to(self.device).float()
        n_clusters = 10
        items = input[:, 0].long()
        clusters = input[:, 1:].float()

        user_vectors = torch.zeros((len(input), self.n_items), device=self.device, dtype=torch.float)
        for cluster in range(n_clusters - 1):
            user_vectors[:, self.cluster_to_items[cluster]] = clusters[:, cluster].unsqueeze(1)

        item_vectors = self.item_array[items]

        output = self.model(user_vectors, item_vectors)
        return output.detach().cpu().numpy()

In [None]:
output_type_dict = {
    "VAE":"multiple",
    "MLP":"single",
    "NCF": "single",
    "MLP_model": "single",
    "GMF_model": "single"
}

num_users_dict = {
    "ML1M":6037,
    "ML1M_demographic":6037,
    "Yahoo":13797, 
    "Pinterest":19155
}

num_items_dict = {
    "ML1M":3381,
    "ML1M_demographic":3381,
    "Yahoo":4604, 
    "Pinterest":9362
}

demographic_dict = {
    "ML1M_demographic": True,
    "ML1M":False,
    "Yahoo":False, 
    "Pinterest":False
}

features_dict = {
    "ML1M_demographic": 3421,
    "ML1M":None,
    "Yahoo":None, 
    "Pinterest":None
}

### Read data

In [None]:
train_data_mixed = pd.read_csv('train_data_Yahoo.csv')

In [None]:
test_data = pd.read_csv('test_data_Yahoo.csv')

In [None]:
train_array = train_data_mixed.to_numpy()
test_array = test_data.to_numpy()

In [None]:
import pickle
filename = 'top1_test_Yahoo_NCF.pkl'

with open(filename, 'rb') as f:
    top1_test = pickle.load(f)

In [None]:
import pickle
filename = 'top1_train_Yahoo_NCF.pkl'

with open(filename, 'rb') as f:
    top1_train = pickle.load(f)

In [None]:
data_name = "Yahoo" ### Can be ML1M, ML1M_demographic, Yahoo, Pinterest
recommender_name = "NCF" ## Can be MLP, VAE, MLP_model, GMF_model, NCF


DP_DIR = Path("processed_data", data_name) 
export_dir = Path(os.getcwd())
files_path = Path(export_dir, DP_DIR)
checkpoints_path = Path(export_dir, "checkpoints")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

output_type = output_type_dict[recommender_name] ### Can be single, multiple
num_users = num_users_dict[data_name] 
num_items = num_items_dict[data_name] 
demographic = demographic_dict[data_name]
if demographic:
    num_features = features_dict[data_name]
else:
    num_features = num_items_dict[data_name]

In [None]:
items_array = np.eye(num_items)
all_items_tensor = torch.Tensor(items_array).to(device)

In [None]:
kw_dict = {'device':device,
          'num_items': num_items,
          'demographic':demographic,
          'num_features':num_features,
          'all_items_tensor':all_items_tensor,
          'items_array':items_array,
          'output_type':output_type,
          'recommender_name':recommender_name}

In [None]:
hidden_dim_dict = {
    ("ML1M","VAE"): None,
    ("ML1M","MLP"): 32,
    ("ML1M","MLP_model"): 8,
    ("ML1M","GMF_model"): 8,
    ("ML1M","NCF"): 8,

    ("ML1M_demographic","VAE"): None,
    ("ML1M_demographic","MLP"): 32,
    ("ML1M_demographic","MLP_model"): 8,
    ("ML1M_demographic","GMF_model"): 8,
    ("ML1M_demographic","NCF"): 8,
    
    ("Yahoo","VAE"): None,
    ("Yahoo","MLP"):32,
    ("Yahoo","MLP_model"): 8,
    ("Yahoo","GMF_model"): 8,
    ("Yahoo","NCF"):8,
    
    ("Pinterest","VAE"): None,
    ("Pinterest","MLP"):512,
    ("Pinterest","MLP_model"): 64,
    ("Pinterest","GMF_model"): 64,
    ("Pinterest","NCF"): 64,
}

In [None]:
hidden_dim = hidden_dim_dict[(data_name,recommender_name)]
#recommender_path = recommender_path_dict[(data_name,recommender_name)]

In [None]:
MLP_temp = MLP_model(hidden_size=hidden_dim, num_layers=3, **kw_dict)
GMF_temp = GMF_model(hidden_size=hidden_dim, **kw_dict)
model = NCF(factor_num=hidden_dim, num_layers=3, dropout=0.5, model= 'NeuMF-pre', GMF_model= GMF_temp, MLP_model=MLP_temp, **kw_dict)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
checkpoint = torch.load("NCF_Yahoo_0.001_64_21_0.pt")
model.load_state_dict(checkpoint)

In [None]:
for param in model.parameters():
    param.requires_grad= False

In [None]:
K = 100
u_train = torch.tensor(train_array[:,1:]).float()
print("u_train.shape ", u_train.shape)
v_train = all_items_tensor
print("v_train.shape ", v_train.shape)
user_ids = train_array[:,0]

### Clustering

In [None]:
np.random.seed(3)
# Cluster items using k-means
from sklearn.cluster import KMeans
import numpy as np
k = 10

kmeans = KMeans(n_clusters=k)
clusters = kmeans.fit_predict(np.transpose(u_train))

In [None]:
item_clusters = kmeans.predict(np.transpose(u_train))

# Create mapping from items to clusters
item_to_cluster = {}
# Create mapping from clusters to items
cluster_to_items = {}
for i, cluster in enumerate(item_clusters):
    item_to_cluster[i] = cluster
    if(cluster not in cluster_to_items.keys()):
        cluster_to_items[cluster] = []
    cluster_to_items[cluster].append(i)

In [None]:
u_test = torch.tensor(test_array[:,1:]).float()

In [None]:
user_to_clusters = np.zeros((u_test.shape[0],10))

In [None]:
for i in cluster_to_items.keys():
    user_to_clusters[:,i] = np.sum(u_test.cpu().detach().numpy().T[cluster_to_items[i]], axis=0)

In [None]:
user_to_clusters_bin =  np.where(user_to_clusters > 0, 1, 0)

In [None]:
user_to_clusters_train = np.zeros((u_train.shape[0],10))

In [None]:
default_value = 0
target_items_test = list(top1_test.values())
target_items_train = list(top1_train.values())

In [None]:
for i in cluster_to_items.keys():
    user_to_clusters_train[:,i] = np.sum(u_train.cpu().detach().numpy().T[cluster_to_items[i]], axis=0)

In [None]:
user_to_clusters_train_bin =  np.where(user_to_clusters_train > 0, 1, 0)

In [None]:
col2 = list(top1_train.values())
input_train_array= np.insert(user_to_clusters_train_bin, 0, col2, axis=1).astype(int)

In [None]:
user_to_clusters_test = np.zeros((u_test.shape[0],10))

In [None]:
for i in cluster_to_items.keys():
    user_to_clusters_test[:,i] = np.sum(u_test.cpu().detach().numpy().T[cluster_to_items[i]], axis=0)

In [None]:
user_to_clusters_test_bin =  np.where(user_to_clusters_test > 0, 1, 0)

In [None]:
col1 = list(top1_test.values())
input_test_array= np.insert(user_to_clusters_test_bin, 0, col1, axis=1).astype(int)

In [None]:
wrap_model = NCFWrapper(model, items_array, cluster_to_items, num_items, device)

### SHAP

In [None]:
K=50

In [None]:
sampled_subset = shap.sample(input_train_array,K)

In [None]:
explainer = shap.KernelExplainer(wrap_model,sampled_subset)

In [None]:
shap_values_test = explainer.shap_values(input_test_array)

In [None]:
col1 = test_array[:,0]
input_test_array= np.insert(shap_values_test, 0, col1,axis=1)

In [None]:
import pickle

file_path = 'item_to_cluster_NCF_Yahoo.pkl'

with open(file_path, 'wb') as f:
    pickle.dump(item_to_cluster, f)

In [None]:
import pickle

file_path = 'shap_values_NCF_Yahoo.pkl'

with open(file_path, 'wb') as f:
    pickle.dump(input_test_array, f)