In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import shap
import pickle

import os
export_dir = os.getcwd()
from pathlib import Path

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import SGD
from torch.nn import BCELoss
from torch.nn import CrossEntropyLoss
import torch.nn.functional as F

  def _pt_shuffle_rec(i, indexes, index_mask, partition_tree, M, pos):
  def delta_minimization_order(all_masks, max_swap_size=100, num_passes=2):
  def _reverse_window(order, start, length):
  def _reverse_window_score_gain(masks, order, start, length):
  def _mask_delta_score(m1, m2):
  def identity(x):
  def _identity_inverse(x):
  def logit(x):
  def _logit_inverse(x):
  def _build_fixed_single_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights):
  def _build_fixed_multi_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights):
  def _init_masks(cluster_matrix, M, indices_row_pos, indptr):
  def _rec_fill_masks(cluster_matrix, indices_row_pos, indptr, indices, M, ind):
  def _single_delta_mask(dind, masked_inputs, last_mask, data, x, noop_code):
  def _delta_masking(masks, x, curr_delta_inds, varying_rows_out,
  def _jit_build_partition_tree(xmin, xmax, ymi

In [2]:
from torch.nn import Softmax
softmax = nn.Softmax()

In [3]:
from sklearn.decomposition import NMF
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
    
from scipy import sparse
from os import path

In [23]:
data_name = "ML1M" ### Can be ML1M, Yahoo, Pinterest
recommender_name = "VAE" ### Can be MLP, VAE
DP_DIR = Path("processed_data", data_name) 
export_dir = Path(os.getcwd())
files_path = Path(export_dir.parent, DP_DIR)
checkpoints_path = Path(export_dir.parent, "checkpoints")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### VAE recommender

In [54]:
class VAE(nn.Module):
    def __init__(self, model_conf, **kw):
        super(VAE, self).__init__()
        self.device = kw['device'] 
        num_features = kw['num_features'] 
        num_items = kw['num_items'] 
        self.num_items = num_items
        self.enc_dims = [self.num_items] + model_conf['enc_dims']
        self.dec_dims = self.enc_dims[::-1]
        self.dims = self.enc_dims + self.dec_dims[1:]
        self.dropout = model_conf['dropout']
        self.softmax = nn.Softmax(dim=1)
        self.total_anneal_steps = model_conf['total_anneal_steps']
        self.anneal_cap = model_conf['anneal_cap']

        self.eps = 1e-6
        self.anneal = 0.
        self.update_count = 0
        
        self.encoder = nn.ModuleList()
        for i, (d_in, d_out) in enumerate(zip(self.enc_dims[:-1], self.enc_dims[1:])):
            if i == len(self.enc_dims[:-1]) - 1:
                d_out *= 2
            self.encoder.append(nn.Linear(d_in, d_out))
            if i != len(self.enc_dims[:-1]) - 1:
                self.encoder.append(nn.ReLU())

        self.decoder = nn.ModuleList()
        for i, (d_in, d_out) in enumerate(zip(self.dec_dims[:-1], self.dec_dims[1:])):
            self.decoder.append(nn.Linear(d_in, d_out))
            if i != len(self.dec_dims[:-1]) - 1:
                self.decoder.append(nn.ReLU())
                
        self.to(self.device)

    def forward(self, rating_matrix):
        """
        Forward pass
        :param rating_matrix: rating matrix
        """
        # encoder
        # Convert the PyTorch tensor to a NumPy array
        
        #if len(rating_matrix.shape) == 1:
        #    rating_matrix = torch.unsqueeze(torch.tensor(rating_matrix), 0)
    
        #rating_matrix_np = rating_matrix.numpy()

        # Normalize along the last dimension using NumPy
        #rating_matrix_np_normalized = rating_matrix_np / np.linalg.norm(rating_matrix_np, axis=-1, keepdims=True)

        # Convert the normalized NumPy array back to a PyTorch tensor
        #h = F.dropout(torch.tensor(rating_matrix_np_normalized), p=self.dropout, training=self.training)
        if len(rating_matrix.shape) == 1:
            rating_matrix = torch.unsqueeze(torch.tensor(rating_matrix), 0)
        h = F.dropout(F.normalize(rating_matrix, dim=-1), p=self.dropout, training=self.training)
        for layer in self.encoder:
            h = layer(h)

        # sample
        mu_q = h[:, :self.enc_dims[-1]]
        logvar_q = h[:, self.enc_dims[-1]:]  # log sigmod^2  batch x 200
        std_q = torch.exp(0.5 * logvar_q)  # sigmod batch x 200
        
        epsilon = torch.zeros_like(std_q).normal_(mean=0, std=0.01)
        sampled_z = mu_q + self.training * epsilon * std_q

        output = sampled_z
        for layer in self.decoder:
            output = layer(output)

        if self.training:
            kl_loss = ((0.5 * (-logvar_q + torch.exp(logvar_q) + torch.pow(mu_q, 2) - 1)).sum(1)).mean()
            return output, kl_loss
        else:
            if self.demographic:
                return self.softmax(output[:,:self.items_only])
            else:
                return self.softmax(output)   
        
    def train_one_epoch(self, dataset, optimizer, batch_size, alpha=0.5):
        """
        Train model for one epoch
        :param dataset: given data
        :param optimizer: choice of optimizer
        :param batch_size: batch size
        :return: model loss
        """
        self.train()

        train_matrix = dataset

        num_training = train_matrix.shape[0]
        num_batches = int(np.ceil(num_training / batch_size))
        perm = np.random.permutation(num_training)

        loss = 0.0
        for b in range(num_batches):
            optimizer.zero_grad()

            if (b + 1) * batch_size >= num_training:
                batch_idx = perm[b * batch_size:]
            else:
                batch_idx = perm[b * batch_size: (b + 1) * batch_size]
            batch_matrix = torch.FloatTensor(train_matrix[batch_idx]).to(self.device)

            if self.total_anneal_steps > 0:
                self.anneal = min(self.anneal_cap, 1. * self.update_count / self.total_anneal_steps)
            else:
                self.anneal = self.anneal_cap

            pred_matrix, kl_loss = self.forward(batch_matrix)

            # cross_entropy
            total_ce = -(F.log_softmax(pred_matrix, 1) * batch_matrix)
            ce_hist = total_ce[:,:self.num_items].sum(1).mean()
            ce_demo = total_ce[:,self.num_items:].sum(1).mean()
            ce_loss = ce_hist+alpha*ce_demo

            batch_loss = ce_loss + kl_loss * self.anneal

            batch_loss.backward()
            optimizer.step()

            self.update_count += 1

            loss += batch_loss
            if b % 200 == 0:
                print('(%3d / %3d) loss = %.4f' % (b, num_batches, batch_loss))
        return loss

    def predict(self, eval_users, test_batch_size):
        """
        Predict the model on test set
        :param eval_users: evaluation (test) user
        :param eval_pos: position of the evaluated (test) item
        :param test_batch_size: batch size for test set
        :return: predictions
        """
        with torch.no_grad():
            input_matrix = torch.Tensor(eval_users).to(self.device)
            preds = np.zeros_like(input_matrix.cpu())

            num_data = input_matrix.shape[0]
            num_batches = int(np.ceil(num_data / test_batch_size))
            perm = list(range(num_data))
            for b in range(num_batches):
                if (b + 1) * test_batch_size >= num_data:
                    batch_idx = perm[b * test_batch_size:]
                else:
                    batch_idx = perm[b * test_batch_size: (b + 1) * test_batch_size]
                    
                test_batch_matrix = input_matrix[batch_idx]
                batch_pred_matrix = self.forward(test_batch_matrix)
                batch_pred_matrix.masked_fill(test_batch_matrix.bool(), float('-inf'))
                preds[batch_idx] = batch_pred_matrix.detach().cpu().numpy()
        return preds
    

In [56]:
class WrapperModel(nn.Module):
    def __init__(self, model, item_array, cluster_to_items, item_to_cluster, num_items, device,num_clusters=10):
        super(WrapperModel, self).__init__()
        self.model = model
        self.n_items = num_items
        self.cluster_to_items = cluster_to_items
        self.item_to_cluster = item_to_cluster
        self.item_array = item_array
        self.device = device
        self.n_clusters = num_clusters
    
    def forward(self, input_array):
        batch_size = input_array.shape[0]  # Get the batch size (number of users)
        user_vector_batch = torch.zeros(batch_size, self.n_items).to(self.device)

        for cluster in range(self.n_clusters-1):
            cluster_indices = self.cluster_to_items[cluster]
            user_vector_batch[:, cluster_indices] = torch.from_numpy(input_array[:, cluster]).unsqueeze(1).float().to(self.device)

        model_output_batch = self.model(user_vector_batch)
        softmax_output_batch = torch.softmax(model_output_batch[0], dim=1)

        cluster_scores_per_user = []

    
        for user in range(batch_size):
            user_cluster_scores = []
            # Compute cluster scores for each cluster
            for cluster, items in self.cluster_to_items.items():
                cluster_scores = softmax_output_batch[user, items]
                avg_score = torch.mean(cluster_scores)
                user_cluster_scores.append(avg_score)
            cluster_scores_per_user.append(torch.stack(user_cluster_scores))

        cluster_scores_per_user = torch.stack(cluster_scores_per_user)

        return cluster_scores_per_user.cpu().detach().numpy()
        
    def predict(self, x):
        x = torch.Tensor(x).to(self.device)
        output = self.forward(x)
        output = torch.Tensor(output).to(device)
        return self.model(output)

In [57]:
output_type_dict = {
    "VAE":"multiple",
    "MLP":"single"
}

num_users_dict = {
    "ML1M":6037,
    "Yahoo":13797, 
    "Pinterest":19155
}

num_items_dict = {
    "ML1M":3381,
    "Yahoo":4604, 
    "Pinterest":9362
}

recommender_path_dict = {
    ("ML1M","VAE"): Path(checkpoints_path, "VAE_ML1M_0.0007_128_10.pt"),
    ("ML1M","MLP"):Path(checkpoints_path, "MLP1_ML1M_0.0076_256_7.pt"),
    
    ("Yahoo","VAE"): Path(checkpoints_path, "VAE_Yahoo_0.0001_128_13.pt"),
    ("Yahoo","MLP"):Path(checkpoints_path, "MLP2_Yahoo_0.0083_128_1.pt"),
    
    ("Pinterest","VAE"): Path(checkpoints_path, "VAE_Pinterest_0.0002_32_12.pt"),
    ("Pinterest","MLP"):Path(checkpoints_path, "MLP_Pinterest_0.0062_512_21_0.pt")
}

hidden_dim_dict = {
    ("ML1M","VAE"): [512,128],
    ("ML1M","MLP"): 32,
    
    ("Yahoo","VAE"): [512,128],
    ("Yahoo","MLP"):32,

    ("Pinterest","VAE"): [512,128],
    ("Pinterest","MLP"):512,

}

### Read data

In [58]:
train_data = pd.read_csv(Path(files_path,f'train_data_{data_name}.csv'), index_col=0)
test_data = pd.read_csv(Path(files_path,f'test_data_{data_name}.csv'), index_col=0)
train_array = train_data.to_numpy()
test_array = test_data.to_numpy()

In [59]:
output_type = output_type_dict[recommender_name] ### Can be single, multiple
num_users = num_users_dict[data_name] 
num_items = num_items_dict[data_name] 
num_features = num_items_dict[data_name]

In [60]:
items_array = np.eye(num_items)
all_items_tensor = torch.Tensor(items_array).to(device)

In [61]:
kw_dict = {'device':device,
          'num_items': num_items,
          'num_features':num_features,
          'all_items_tensor':all_items_tensor,
          'items_array':items_array,
          'output_type':output_type,
          'recommender_name':recommender_name}

In [62]:
hidden_dim_dict = {
    ("ML1M","VAE"): None,
    ("ML1M","MLP"): 32,

    ("Yahoo","VAE"): None,
    ("Yahoo","MLP"):32,
    
    ("Pinterest","VAE"): None,
    ("Pinterest","MLP"):512,
}

In [66]:
VAE_config= {
"enc_dims": [512,128],
"dropout": 0.5,
"anneal_cap": 0.2,
"total_anneal_steps": 200000
}

In [67]:
hidden_dim = hidden_dim_dict[(data_name,recommender_name)]
recommender_path = recommender_path_dict[(data_name,recommender_name)]

In [68]:
def load_recommender():
    if recommender_name=='MLP':
        recommender = MLP(hidden_dim, **kw_dict)
    elif recommender_name=='VAE':
        recommender = VAE(VAE_config, **kw_dict)
    recommender_checkpoint = torch.load(Path(checkpoints_path, recommender_path))
    recommender.load_state_dict(recommender_checkpoint)
    recommender.eval()
    for param in recommender.parameters():
        param.requires_grad= False
    return recommender
    
model = load_recommender()

In [70]:
model = VAE(VAE_config ,**kw_dict)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [71]:
for param in model.parameters():
    param.requires_grad= False

In [77]:
K = 100
u_train = torch.tensor(train_array).float()
v_train = all_items_tensor
user_ids = np.arange(train_array.shape[0])

u_train.shape  torch.Size([4829, 3381])
v_train.shape  torch.Size([3381, 3381])


### Clustering

In [78]:
np.random.seed(3)
# Cluster items using k-means
from sklearn.cluster import KMeans
import numpy as np
k = 10

kmeans = KMeans(n_clusters=k)
clusters = kmeans.fit_predict(np.transpose(u_train))

In [79]:
item_clusters = kmeans.predict(np.transpose(u_train))

# Create mapping from items to clusters
item_to_cluster = {}
# Create mapping from clusters to items
cluster_to_items = {}
for i, cluster in enumerate(item_clusters):
    item_to_cluster[i] = cluster
    if(cluster not in cluster_to_items.keys()):
        cluster_to_items[cluster] = []
    cluster_to_items[cluster].append(i)

In [80]:
u_test = torch.tensor(test_array).float()

In [81]:
user_to_clusters = np.zeros((u_test.shape[0],10))

In [82]:
for i in cluster_to_items.keys():
    user_to_clusters[:,i] = np.sum(u_test.cpu().detach().numpy().T[cluster_to_items[i]], axis=0)

In [83]:
user_to_clusters_bin =  np.where(user_to_clusters > 0, 1, 0)

In [85]:
user_to_clusters_train = np.zeros((u_train.shape[0],10))

In [86]:
for i in cluster_to_items.keys():
    user_to_clusters_train[:,i] = np.sum(u_train.cpu().detach().numpy().T[cluster_to_items[i]], axis=0)

In [87]:
user_to_clusters_train_bin =  np.where(user_to_clusters_train > 0, 1, 0)

In [88]:
wrap_model = WrapperModel(model, items_array, cluster_to_items, item_to_cluster, num_items, device)

### SHAP

In [89]:
K=50


In [90]:
sampled_subset = shap.sample(user_to_clusters_train_bin,K)

In [91]:
explainer = shap.KernelExplainer(wrap_model,sampled_subset)

In [93]:
shap_values_test = explainer.shap_values(user_to_clusters_bin)

  0%|          | 0/1208 [00:00<?, ?it/s]

In [95]:
average_shap = np.mean(shap_values_test, axis=0)

In [97]:
col1 = np.arange(test_array.shape[0]) + train_array.shape[0]
input_test_array = np.insert(average_shap, 0, col1, axis=1)

In [238]:
with open(Path(files_path,f'item_to_cluster_{recommender_name}_{data_name}.pkl'), 'wb') as f:
    pickle.dump(item_to_cluster, f)

In [239]:
with open(Path(files_path,f'shap_values_{recommender_name}_{data_name}.pkl'), 'wb') as f:
    pickle.dump(input_test_array, f)