In [1]:
import pandas as pd
import numpy as np
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
export_dir = os.getcwd()
from pathlib import Path
import pickle
from collections import defaultdict
import time
import torch
import torch.nn as nn
import copy
import torch.nn.functional as F
import optuna
import logging
import matplotlib.pyplot as plt
import ipynb
import importlib
import sys
from tqdm import tqdm
import multiprocessing
from functools import partial
from concurrent.futures import ProcessPoolExecutor
import torch.multiprocessing as mp
from openpyxl.cell.cell import MergedCell

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_name = "ML1M" ### Can be ML1M, Yahoo, Pinterest
recommender_name = "MLP" ### Can be MLP, VAE, NCF

DP_DIR = Path("processed_data", data_name) 
export_dir = Path(os.getcwd())
files_path = Path("/storage/mikhail/PI4Rec", DP_DIR)
checkpoints_path = Path(export_dir.parent, "checkpoints")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
output_type_dict = {
    "VAE":"multiple",
    "MLP":"single",
    "NCF": "single"}

num_users_dict = {
    "ML1M":6037,
    "Yahoo":13797, 
    "Pinterest":19155}

num_items_dict = {
    "ML1M":3381,
    "Yahoo":4604, 
    "Pinterest":9362}


recommender_path_dict = {
    ("ML1M","VAE"): Path(checkpoints_path, "VAE_ML1M_0.0007_128_10.pt"),
    ("ML1M","MLP"):Path(checkpoints_path, "MLP1_ML1M_0.0076_256_7.pt"),#MLP_ML1M_0.0026_512_14_4.pt
    ("ML1M","NCF"):Path(checkpoints_path, "NCF_ML1M_5e-05_64_16.pt"),
    
    ("Yahoo","VAE"): Path(checkpoints_path, "VAE_Yahoo_0.0001_128_13.pt"),
    ("Yahoo","MLP"):Path(checkpoints_path, "MLP2_Yahoo_0.0083_128_1.pt"),
    ("Yahoo","NCF"):Path(checkpoints_path, "NCF_Yahoo_0.001_64_21_0.pt"),
    
    ("Pinterest","VAE"): Path(checkpoints_path, "VAE_Pinterest_12_18_0.0001_256.pt"),
    ("Pinterest","MLP"):Path(checkpoints_path, "MLP_Pinterest_0.0062_512_21_0.pt"),
    ("Pinterest","NCF"):Path(checkpoints_path, "NCF2_Pinterest_9e-05_32_9_10.pt"),}


hidden_dim_dict = {
    ("ML1M","VAE"): None,
    ("ML1M","MLP"): 32,
    ("ML1M","NCF"): 8,

    ("Yahoo","VAE"): None,
    ("Yahoo","MLP"):32,
    ("Yahoo","NCF"):8,
    
    ("Pinterest","VAE"): None,
    ("Pinterest","MLP"):512,
    ("Pinterest","NCF"): 64,
}


LXR_checkpoint_dict = {
    ("ML1M","VAE"): ('LXR_ML1M_VAE_26_38_128_3.185652725834087_1.420642300151426.pt',128),
    ("ML1M","MLP"): ('LXR_ML1M_MLP_19_3_128_13.109692424872248_7.829643365925428.pt',128),
    ("Yahoo","VAE"): ('LXR_Yahoo_VAE_neg-1.5pos_combined_19_26_128_18.958765029913238_4.92235962483309.pt',128),
    ("Yahoo","MLP"):('LXR_Yahoo_MLP_neg-pos_combined_last_29_37_128_12.40692505393434_0.19367009952856118.pt',128),
    ("Pinterest","VAE"): ('LXR_Pinterest_VAE_0_18_64_3.669673618522336_1.7221734058804223.pt',64),
    ("Pinterest","MLP"):('LXR_Pinterest_MLP_0_5_16_10.059416809308486_0.705778173474644.pt',16),
}

In [4]:
output_type = output_type_dict[recommender_name] ### Can be single, multiple
num_users = num_users_dict[data_name] 
num_items = num_items_dict[data_name] 

hidden_dim = hidden_dim_dict[(data_name,recommender_name)]
recommender_path = recommender_path_dict[(data_name,recommender_name)]

In [5]:
train_data = pd.read_csv(Path(files_path,f'train_data_{data_name}.csv'), index_col=0)
test_data = pd.read_csv(Path(files_path,f'test_data_{data_name}.csv'), index_col=0)
static_test_data = pd.read_csv(Path(files_path,f'static_test_data_{data_name}.csv'), index_col=0)
with open(Path(files_path,f'pop_dict_{data_name}.pkl'), 'rb') as f:
    pop_dict = pickle.load(f)
train_array = train_data.to_numpy()
test_array = test_data.to_numpy()
items_array = np.eye(num_items)
all_items_tensor = torch.Tensor(items_array).to(device)

In [6]:
test_array = static_test_data.iloc[:,:-2].to_numpy()
with open(Path(files_path, f'jaccard_based_sim_{data_name}.pkl'), 'rb') as f:
    jaccard_dict = pickle.load(f) 
with open(Path(files_path, f'cosine_based_sim_{data_name}.pkl'), 'rb') as f:
    cosine_dict = pickle.load(f) 
with open(Path(files_path, f'pop_dict_{data_name}.pkl'), 'rb') as f:
    pop_dict = pickle.load(f) 
with open(Path(files_path, f'item_to_cluster_{recommender_name}_{data_name}.pkl'), 'rb') as f:
    item_to_cluster = pickle.load(f) 
with open(Path(files_path, f'shap_values_{recommender_name}_{data_name}.pkl'), 'rb') as f:
    shap_values= pickle.load(f) 
for i in range(num_items):
    for j in range(i, num_items):
        jaccard_dict[(j,i)]= jaccard_dict[(i,j)]
        cosine_dict[(j,i)]= cosine_dict[(i,j)]
        pop_array = np.zeros(len(pop_dict))
for key, value in pop_dict.items():
    pop_array[key] = value
kw_dict = {'device':device,
          'num_items': num_items,
           'num_features': num_items, 
            'demographic':False,
          'pop_array':pop_array,
          'all_items_tensor':all_items_tensor,
          'static_test_data':static_test_data,
          'items_array':items_array,
          'output_type':output_type,
          'recommender_name':recommender_name}

In [7]:
import os


#os.chdir('/storage/mikhail/PI4Rec/code')
print(os.getcwd())

/storage/mikhail/PI4Rec/code


In [8]:
sys.path.append('../baselines') 
from ipynb.fs.defs.help_functions import recommender_run
from ipynb.fs.defs.lime import *
from ipynb.fs.defs.lime import *
importlib.reload(ipynb.fs.defs.lime)
from ipynb.fs.defs.lime import *
lime = LimeBase(distance_to_proximity)



from ipynb.fs.defs.help_functions import *
importlib.reload(ipynb.fs.defs.help_functions)
from ipynb.fs.defs.help_functions import *

from ipynb.fs.defs.recommenders_architecture import *
importlib.reload(ipynb.fs.defs.recommenders_architecture)
from ipynb.fs.defs.recommenders_architecture import *

VAE_config= {
"enc_dims": [512,128],
"dropout": 0.5,
"anneal_cap": 0.2,
"total_anneal_steps": 200000}


Pinterest_VAE_config= {
"enc_dims": [256,64],
"dropout": 0.5,
"anneal_cap": 0.2,
"total_anneal_steps": 200000}

In [9]:
def load_recommender():
    if recommender_name == 'MLP':
        recommender = MLP(hidden_dim, **kw_dict)
    elif recommender_name == 'VAE':  # This was incorrectly checking for 'MLP' twice
        if data_name == "Pinterest":
            recommender = VAE(Pinterest_VAE_config, **kw_dict)
        else:
            recommender = VAE(VAE_config, **kw_dict)
    elif recommender_name == 'NCF':
        MLP_temp = MLP_model(hidden_size=hidden_dim, num_layers=3, **kw_dict)
        GMF_temp = GMF_model(hidden_size=hidden_dim, **kw_dict)
        recommender = NCF(factor_num=hidden_dim, num_layers=3, dropout=0.5, model='NeuMF-pre', GMF_model=GMF_temp, MLP_model=MLP_temp, **kw_dict)
    else:
        raise ValueError(f"Unknown recommender name: {recommender_name}")

    # Check if the model's state_dict matches the architecture
    recommender_checkpoint = torch.load(recommender_path, map_location=device)
    recommender.load_state_dict(recommender_checkpoint, strict=False)
    recommender.eval()
    for param in recommender.parameters():
        param.requires_grad = False
    
    return recommender

In [10]:
def find_pop_mask(x, item_id):
    user_hist = torch.Tensor(x).to(device) # remove the positive item we want to explain from the user history
    user_hist[item_id] = 0
    item_pop_dict = {}
    
    for i,j in enumerate(user_hist>0):
        if j:
            item_pop_dict[i]=pop_array[i] # add the pop of the item to the dictionary
            
    return item_pop_dict

In [11]:
#User based similarities using Jaccard
def find_jaccard_mask(x, item_id, user_based_Jaccard_sim):
    user_hist = x # remove the positive item we want to explain from the user history
    user_hist[item_id] = 0
    item_jaccard_dict = {}
    for i,j in enumerate(user_hist>0):
        if j:
            if (i,item_id) in user_based_Jaccard_sim:
                item_jaccard_dict[i]=user_based_Jaccard_sim[(i,item_id)] # add Jaccard similarity between items
            else:
                item_jaccard_dict[i] = 0            

    return item_jaccard_dict

In [12]:
#Cosine based similarities between users and items
def find_cosine_mask(x, item_id, item_cosine):
    user_hist = x # remove the positive item we want to explain from the user history
    user_hist[item_id] = 0
    item_cosine_dict = {}
    for i,j in enumerate(user_hist>0):
        if j:
            if (i,item_id) in item_cosine:
                item_cosine_dict[i]=item_cosine[(i,item_id)]
            else:
                item_cosine_dict[i]=0

    return item_cosine_dict

In [13]:
class Explainer(nn.Module):
    def __init__(self, user_size, item_size, hidden_size):
        super(Explainer, self).__init__()
        
        self.users_fc = nn.Linear(in_features = user_size, out_features=hidden_size).to(device)
        self.items_fc = nn.Linear(in_features = item_size, out_features=hidden_size).to(device)
        self.bottleneck = nn.Sequential(
            nn.Tanh(),
            nn.Linear(in_features = hidden_size*2, out_features=hidden_size).to(device),
            nn.Tanh(),
            nn.Linear(in_features = hidden_size, out_features=user_size).to(device),
            nn.Sigmoid()
        ).to(device)
        
        
    def forward(self, user_tensor, item_tensor):
        user_output = self.users_fc(user_tensor.float())
        item_output = self.items_fc(item_tensor.float())
        combined_output = torch.cat((user_output, item_output), dim=-1)
        expl_scores = self.bottleneck(combined_output).to(device)
        return expl_scores

In [14]:
def find_lxr_mask(x, item_tensor, explainer):
    user_hist = x
    expl_scores = explainer(user_hist, item_tensor)
    
    # Debug: Print the explainer scores and user history
    print(f"Explainer scores: {expl_scores[:5]}")
    print(f"User history before masking: {user_hist[:5]}")
    
    x_masked = user_hist * expl_scores
    
    # Debug: Print masked user history
    print(f"User history after masking: {x_masked[:5]}")
    
    item_sim_dict = {}
    for i, j in enumerate(x_masked != 0):
        if j:
            item_sim_dict[i] = x_masked[i]
    
    return item_sim_dict


In [15]:
def load_explainer(fine_tuning=False, lambda_pos=None, lambda_neg=None, alpha=None):
    lxr_path, lxr_dim = LXR_checkpoint_dict[(data_name, recommender_name)]
    explainer = Explainer(num_items, num_items, lxr_dim)
    lxr_checkpoint = torch.load(Path(checkpoints_path, lxr_path))
    explainer.load_state_dict(lxr_checkpoint)
    explainer.eval()
    for param in explainer.parameters():
        param.requires_grad = False
    return explainer

In [16]:
def find_lime_mask(x, item_id, min_pert, max_pert, num_of_perturbations, kernel_func, feature_selection, recommender, num_samples=10, method = 'POS', **kw_dict):
    user_hist = x # remove the positive item we want to explain from the user history
    user_hist[item_id] = 0
    lime.kernel_fn = kernel_func
    neighborhood_data, neighborhood_labels, distances, item_id = get_lime_args(user_hist, item_id, recommender, all_items_tensor, min_pert = min_pert, max_pert = max_pert, num_of_perturbations = num_of_perturbations, seed = item_id, **kw_dict)
    if method=='POS':
        most_pop_items  = lime.explain_instance_with_data(neighborhood_data, neighborhood_labels, distances, item_id, num_samples, feature_selection, pos_neg='POS')
    if method=='NEG':
        most_pop_items  = lime.explain_instance_with_data(neighborhood_data, neighborhood_labels, distances, item_id, num_samples, feature_selection ,pos_neg='NEG')
        
    return most_pop_items 

In [17]:
def find_lire_mask(x, item_id, num_of_perturbations, kernel_func, feature_selection, recommender, proba=0.1, method = 'POS', **kw_dict):
    user_hist = x # remove the positive item we want to explain from the user history
    user_hist[item_id] = 0
    lime.kernel_fn = kernel_func

    neighborhood_data, neighborhood_labels, distances, item_id = get_lire_args(user_hist, item_id, recommender, all_items_tensor, train_array, num_of_perturbations = num_of_perturbations, seed = item_id, proba=0.1, **kw_dict)
    if method=='POS':
        most_pop_items  = lime.explain_instance_with_data(neighborhood_data, neighborhood_labels, distances, item_id, num_of_perturbations, feature_selection, pos_neg='POS')
    if method=='NEG':
        most_pop_items  = lime.explain_instance_with_data(neighborhood_data, neighborhood_labels, distances, item_id, num_of_perturbations, feature_selection ,pos_neg='NEG')
        
    return most_pop_items

In [18]:
def find_fia_mask(user_tensor, item_tensor, item_id, recommender):
    y_pred = recommender_run(user_tensor, recommender, item_tensor, item_id, **kw_dict).to(device)
    items_fia = {}
    user_hist = user_tensor.cpu().detach().numpy().astype(int)
    
    for i in range(num_items):
        if(user_hist[i] == 1):
            user_hist[i] = 0
            user_tensor = torch.FloatTensor(user_hist).to(device)
            y_pred_without_item = recommender_run(user_tensor, recommender, item_tensor, item_id, 'single', **kw_dict).to(device)
            infl_score = y_pred - y_pred_without_item
            items_fia[i] = infl_score
            user_hist[i] = 1

    return items_fia

In [19]:
def find_shapley_mask(user_tensor, user_id, model, shap_values, item_to_cluster):
    item_shap = {}
    shapley_values = shap_values[shap_values[:, 0].astype(int) == user_id][:,1:]
    user_vector = user_tensor.cpu().detach().numpy().astype(int)

    for i in np.where(user_vector.astype(int) == 1)[0]:
        items_cluster = item_to_cluster[i]
        item_shap[i] = shapley_values.T[int(items_cluster)][0]

    return item_shap  

In [20]:
def find_accent_mask(user_tensor, user_id, item_tensor, item_id, recommender_model, top_k):
   
    items_accent = defaultdict(float)
    factor = top_k - 1
    user_accent_hist = user_tensor.cpu().detach().numpy().astype(int)

    #Get topk items
    sorted_indices = list(get_top_k(user_tensor, user_tensor, recommender_model, **kw_dict).keys())
    
    if top_k == 1:
        # When k=1, return the index of the first maximum value
        top_k_indices = [sorted_indices[0]]
    else:
        top_k_indices = sorted_indices[:top_k]
   

    for iteration, item_k_id in enumerate(top_k_indices):

        # Set topk items to 0 in the user's history
        user_accent_hist[item_k_id] = 0
        user_tensor = torch.FloatTensor(user_accent_hist).to(device)
       
        item_vector = items_array[item_k_id]
        item_tensor = torch.FloatTensor(item_vector).to(device)
              
        # Check influence of the items in the history on this specific item in topk
        fia_dict = find_fia_mask(user_tensor, item_tensor, item_k_id, recommender_model)
         
        # Sum up all differences between influence on top1 and other topk values
        if not iteration:
            for key in fia_dict.keys():
                items_accent[key] *= factor
        else:
            for key in fia_dict.keys():
                items_accent[key] -= fia_dict[key]
       
    for key in items_accent.keys():
        items_accent[key] *= -1    

    return items_accent

In [21]:
def single_user_expl(user_vector, user_tensor, item_id, item_tensor, num_items, recommender_model, user_id = None, mask_type = None):
    '''
    This function invokes various explanation functions
    and returns a dictionary of explanations, sorted by their scores.
    '''
    user_hist_size = np.sum(user_vector)

    if mask_type == 'lime':
        POS_sim_items = find_lime_mask(user_vector, item_id, 50, 100, 150, distance_to_proximity, 'highest_weights', recommender_model, num_samples=user_hist_size, **kw_dict)
        NEG_sim_items = find_lime_mask(user_vector, item_id, 50, 100, 150, distance_to_proximity, 'highest_weights', recommender_model, num_samples=user_hist_size, method='NEG', **kw_dict)
    elif mask_type == 'lire':
        POS_sim_items = find_lire_mask(user_vector, item_id, 200, distance_to_proximity, 'highest_weights', recommender_model, proba=0.1, **kw_dict)
        NEG_sim_items = find_lire_mask(user_vector, item_id, 200, distance_to_proximity, 'highest_weights', recommender_model, proba=0.1, method='NEG', **kw_dict)
    else:
        if mask_type == 'cosine':
            sim_items = find_cosine_mask(user_tensor, item_id, cosine_dict)
        elif mask_type == 'shap':
            sim_items = find_shapley_mask(user_tensor, user_id, recommender_model, shap_values, item_to_cluster)
        elif mask_type == 'accent':
            sim_items = find_accent_mask(user_tensor, user_id, item_tensor, item_id, recommender_model, 5)
        elif mask_type == 'lxr':
            explainer = load_explainer(True)
            sim_items = find_lxr_mask(user_tensor, item_tensor, explainer)

        POS_sim_items = list(sorted(sim_items.items(), key=lambda item: item[1], reverse=True))[0:user_hist_size]


In [22]:
def single_user_metrics(user_vector, user_tensor, item_id, item_tensor, recommender_model, expl_dict, **kw_dict):
    """
    Calculate metrics for a single user with 5 steps of item masking
    Now explicitly includes POS@20 and NEG@20 metrics
    """
    POS_masked = user_tensor.clone()
    NEG_masked = user_tensor.clone()
    POS_masked[item_id] = 0
    NEG_masked[item_id] = 0
    
    # Use 5 steps
    num_steps = 5
    bins = range(1, num_steps + 1)  # [1, 2, 3, 4, 5]
    
    # Initialize metric arrays with 5 elements each
    POS_at_5 = [0] * num_steps
    POS_at_10 = [0] * num_steps
    POS_at_20 = [0] * num_steps  # Explicitly initialize POS@20
    
    NEG_at_5 = [0] * num_steps
    NEG_at_10 = [0] * num_steps
    NEG_at_20 = [0] * num_steps  # Explicitly initialize NEG@20
    
    DEL = [0] * num_steps
    INS = [0] * num_steps
    NDCG = [0] * num_steps
    
    # Get sorted items by importance
    POS_sim_items = expl_dict
    NEG_sim_items = list(sorted(dict(POS_sim_items).items(), key=lambda item: item[1], reverse=False))
    
    # For each step (1 to 5 items)
    for i, num_items_to_mask in enumerate(bins):
        # Create masks for positive and negative cases
        POS_masked = torch.zeros_like(user_tensor, dtype=torch.float32, device=kw_dict['device'])
        NEG_masked = torch.zeros_like(user_tensor, dtype=torch.float32, device=kw_dict['device'])
        
        # Apply POS masking
        for j in POS_sim_items[:num_items_to_mask]:
            POS_masked[j[0]] = 1
        POS_masked = user_tensor * (1 - POS_masked)
        
        # Apply NEG masking
        for j in NEG_sim_items[:num_items_to_mask]:
            NEG_masked[j[0]] = 1
        NEG_masked = user_tensor * (1 - NEG_masked)
        
        # Get rankings for both POS and NEG
        POS_ranked_list = get_top_k(POS_masked, user_tensor, recommender_model, **kw_dict)
        NEG_ranked_list = get_top_k(NEG_masked, user_tensor, recommender_model, **kw_dict)
        
        # Calculate POS ranking
        if item_id in list(POS_ranked_list.keys()):
            POS_index = list(POS_ranked_list.keys()).index(item_id) + 1
        else:
            POS_index = kw_dict['num_items']
            
        # Calculate NEG ranking
        if item_id in list(NEG_ranked_list.keys()):
            NEG_index = list(NEG_ranked_list.keys()).index(item_id) + 1
        else:
            NEG_index = kw_dict['num_items']
        
        # Calculate ALL metrics including P@20
        POS_at_5[i] = 1 if POS_index <= 5 else 0
        POS_at_10[i] = 1 if POS_index <= 10 else 0
        POS_at_20[i] = 1 if POS_index <= 20 else 0  # Explicitly calculate POS@20
        
        NEG_at_5[i] = 1 if NEG_index <= 5 else 0
        NEG_at_10[i] = 1 if NEG_index <= 10 else 0
        NEG_at_20[i] = 1 if NEG_index <= 20 else 0  # Explicitly calculate NEG@20
        
        # Calculate other metrics
        DEL[i] = float(recommender_run(POS_masked, recommender_model, item_tensor, item_id, **kw_dict).detach().cpu().numpy())
        INS[i] = float(recommender_run(user_tensor - POS_masked, recommender_model, item_tensor, item_id, **kw_dict).detach().cpu().numpy())
        NDCG[i] = get_ndcg(list(POS_ranked_list.keys()), item_id, **kw_dict)
    
    # Return all metrics including P@20
    res = [DEL, INS, NDCG, 
           POS_at_5, POS_at_10, POS_at_20,  # Include POS@20
           NEG_at_5, NEG_at_10, NEG_at_20]  # Include NEG@20
    return [np.array(x) for x in res]

In [23]:
class MetricsBaselines:
    def __init__(self, data_name, recommender_name):
        self.data_name = data_name
        self.recommender_name = recommender_name
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.setup_data_and_recommender()

    def setup_data_and_recommender(self):
        # Set up all necessary data and variables
        DP_DIR = Path("processed_data", self.data_name)
        self.files_path = Path(export_dir.parent, DP_DIR)
        self.num_users = num_users_dict[self.data_name]
        self.num_items = num_items_dict[self.data_name]
        
        self.test_data = pd.read_csv(Path(self.files_path, f'test_data_{self.data_name}.csv'), index_col=0)
        self.test_array = self.test_data.to_numpy()
        self.items_array = np.eye(self.num_items)
        
        with open(Path(self.files_path, f'pop_dict_{self.data_name}.pkl'), 'rb') as f:
            self.pop_dict = pickle.load(f)
        
        # Load other necessary data (jaccard_dict, cosine_dict, item_to_cluster, shap_values)
        
        self.kw_dict = {
            'device': self.device,
            'num_items': self.num_items,
            'num_features': self.num_items,
            'demographic': False,
            'pop_array': np.array([self.pop_dict.get(i, 0) for i in range(self.num_items)]),
            'all_items_tensor': torch.eye(self.num_items).to(self.device),
            'static_test_data': self.test_data,
            'items_array': self.items_array,
            'output_type': output_type_dict[self.recommender_name],
            'recommender_name': self.recommender_name,
            'files_path': self.files_path
        }
        
        self.recommender = self.load_recommender()

In [24]:
def process_user(user_index, test_array, test_data, recommender, kw_dict):
    try:
        user_vector = test_array[user_index]
        user_tensor = torch.FloatTensor(user_vector).to(kw_dict['device'])
        user_id = int(test_data.index[user_index])

        item_id = int(get_user_recommended_item(user_tensor, recommender, **kw_dict).detach().cpu().numpy())
        item_vector = kw_dict['items_array'][item_id]
        item_tensor = torch.FloatTensor(item_vector).to(kw_dict['device'])

        user_vector[item_id] = 0
        user_tensor[item_id] = 0

        results = {}
        for method in ['pop', 'jaccard', 'cosine', 'lime', 'lxr', 'accent', 'shap']:
            results[method] = single_user_expl(user_vector, user_tensor, item_id, item_tensor, kw_dict['num_items'], recommender, mask_type=method, user_id=user_id if method == 'shap' else None)

        return user_id, results
    except Exception as e:
        print(f"Error processing user {user_id}: {str(e)}")
        return None

In [25]:
def eval_one_expl_type(expl_name):
    print(f' ============ Start explaining {data_name} {recommender_name} by {expl_name} ============')
    
    # Load the appropriate explanation dictionary
    if expl_name == 'PI_base':
        with open(Path(files_path, f'{recommender_name}_PI_base_expl_dict.pkl'), 'rb') as handle:
            expl_dict = pickle.load(handle)
    else:
        with open(Path(files_path,f'{recommender_name}_{expl_name}_expl_dict.pkl'), 'rb') as handle:
            expl_dict = pickle.load(handle)
    
    recommender.eval()
    
    # Initialize arrays for metrics with 5 steps
    num_steps = 5
    users_DEL = np.zeros(num_steps)
    users_INS = np.zeros(num_steps)
    NDCG = np.zeros(num_steps)
    
    POS_at_5 = np.zeros(num_steps)
    POS_at_10 = np.zeros(num_steps)
    POS_at_20 = np.zeros(num_steps)
    
    NEG_at_5 = np.zeros(num_steps)
    NEG_at_10 = np.zeros(num_steps)
    NEG_at_20 = np.zeros(num_steps)

    with torch.no_grad():
        for i in tqdm(range(test_array.shape[0])):
            user_vector = test_array[i]
            user_tensor = torch.FloatTensor(user_vector).to(device)
            user_id = int(test_data.index[i])

            item_id = int(get_user_recommended_item(user_tensor, recommender, **kw_dict).detach().cpu().numpy())
            item_vector = items_array[item_id]
            item_tensor = torch.FloatTensor(item_vector).to(device)

            user_vector[item_id] = 0
            user_tensor[item_id] = 0

            user_expl = expl_dict[user_id]

            res = single_user_metrics(user_vector, user_tensor, item_id, item_tensor, recommender, user_expl, **kw_dict)
            users_DEL += res[0]
            users_INS += res[1]
            NDCG += res[2]
            POS_at_5 += res[3]
            POS_at_10 += res[4]
            POS_at_20 += res[5]
            NEG_at_5 += res[6]
            NEG_at_10 += res[7]
            NEG_at_20 += res[8]

    a = test_array.shape[0]

    print(f'users_DEL_{expl_name}: ', np.mean(users_DEL)/a)
    print(f'users_INS_{expl_name}: ', np.mean(users_INS)/a)
    print(f'NDCG_{expl_name}: ', np.mean(NDCG)/a)
    print(f'POS_at_5_{expl_name}: ', np.mean(POS_at_5)/a)
    print(f'POS_at_10_{expl_name}: ', np.mean(POS_at_10)/a)
    print(f'POS_at_20_{expl_name}: ', np.mean(POS_at_20)/a)
    print(f'NEG_at_5_{expl_name}: ', np.mean(NEG_at_5)/a)
    print(f'NEG_at_10_{expl_name}: ', np.mean(NEG_at_10)/a)
    print(f'NEG_at_20_{expl_name}: ', np.mean(NEG_at_20)/a)

    return {
        'DEL': users_DEL/a,
        'INS': users_INS/a,
        'NDCG': NDCG/a,
        'POS_at_5': POS_at_5/a,
        'POS_at_10': POS_at_10/a,
        'POS_at_20': POS_at_20/a,
        'NEG_at_5': NEG_at_5/a,
        'NEG_at_10': NEG_at_10/a,
        'NEG_at_20': NEG_at_20/a
    }

In [26]:
def run_all_baselines(data_name, recommender_name):
    global num_users, num_items, device, kw_dict, recommender, test_array, test_data, items_array, jaccard_dict, cosine_dict, pop_dict, item_to_cluster, shap_values

    # Update global variables for the current dataset and recommender
    num_users = num_users_dict[data_name]
    num_items = num_items_dict[data_name]
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load dataset-specific files
    DP_DIR = Path("processed_data", data_name)
    files_path = Path(export_dir.parent, DP_DIR)
    test_data = pd.read_csv(Path(files_path, f'test_data_{data_name}.csv'), index_col=0)
    test_array = test_data.to_numpy()
    items_array = np.eye(num_items)

    with open(Path(files_path, f'jaccard_based_sim_{data_name}.pkl'), 'rb') as f:
        jaccard_dict = pickle.load(f)
    with open(Path(files_path, f'cosine_based_sim_{data_name}.pkl'), 'rb') as f:
        cosine_dict = pickle.load(f)
    with open(Path(files_path, f'pop_dict_{data_name}.pkl'), 'rb') as f:
        pop_dict = pickle.load(f)
    with open(Path(files_path, f'item_to_cluster_{recommender_name}_{data_name}.pkl'), 'rb') as f:
        item_to_cluster = pickle.load(f)
    with open(Path(files_path, f'shap_values_{recommender_name}_{data_name}.pkl'), 'rb') as f:
        shap_values = pickle.load(f)

    # Update kw_dict
    kw_dict = {
        'device': device,
        'num_items': num_items,
        'num_features': num_items,
        'demographic': False,
        'pop_array': np.array([pop_dict.get(i, 0) for i in range(num_items)]),
        'all_items_tensor': torch.eye(num_items).to(device),
        'static_test_data': test_data,
        'items_array': items_array,
        'output_type': output_type_dict[recommender_name],
        'recommender_name': recommender_name,
        'files_path': files_path
    }

    # Load recommender
    recommender = load_recommender()

    # Run all baselines
    baselines = [ 'jaccard', 'cosine', 'lime', 'lxr', 'accent', 'shap']
    results = {}

    for baseline in baselines:
        print(f"Running {baseline} baseline for {data_name} {recommender_name}")
        results[baseline] = eval_one_expl_type(baseline)

    return results

In [27]:
def plot_all_metrics(results, data_name, recommender_name):
    # Mapping between metric keys and their display names (title, y-label, indicator)
    metrics_mapping = {
        'DEL':      ('AUC DEL-P@K', 'DEL@Kₑ', 'Lower is better'),
        'INS':      ('AUC INS-P@K', 'INS@Kₑ', 'Higher is better'),
        'NDCG':     ('AUC NDCG-P',  'CNDCG@Kₑ', 'Lower is better'),
        'POS_at_5': ('AUC POS-P@5', 'POS@20Kₑ', 'Lower is better'),
        'POS_at_10':('AUC POS-P@10','POS@20Kₑ', 'Lower is better'),
        'POS_at_20':('AUC POS-P@20','POS@20Kₑ', 'Lower is better'),
        'NEG_at_5': ('AUC NEG-P@5', 'NEG@20Kₑ', 'Higher is better'),
        'NEG_at_10':('AUC NEG-P@10','NEG@20Kₑ', 'Higher is better'),
        'NEG_at_20':('AUC NEG-P@20','NEG@20Kₑ', 'Higher is better')
    }
    
    # Style lists
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
    markers = ['o', 's', '^', 'D', 'v', 'x']
    linestyles = ['-', '--', '-.', ':', (0, (3, 1, 1, 1)), (0, (5, 2))]
    
    # Create plots directory
    os.makedirs('plots_discrete', exist_ok=True)
    
    # Plot each metric
    for metric, (title_name, y_label, indicator) in metrics_mapping.items():
        plt.figure(figsize=(12, 8))
        
        for i, (baseline, baseline_metrics) in enumerate(results.items()):
            if metric not in baseline_metrics:
                print(f"Warning: {metric} not found in {baseline} metrics")
                continue
            
            values = baseline_metrics[metric][:5]  # Take only first 5 values
            x = range(1, len(values) + 1)  # Numbers of masked items (1, 2, 3, 4, 5)
            
            plt.plot(
                x, values,
                label=baseline.upper(),
                color=colors[i % len(colors)],
                linestyle=linestyles[i % len(linestyles)],
                marker=markers[i % len(markers)],
                markersize=8,
                linewidth=2,
                markevery=1  # Markers on each value
            )
        
        plt.xlabel("Number of Masked Items", fontsize=30)
        plt.ylabel(y_label, fontsize=30)
        plt.grid(True, linestyle='--', alpha=0.7, linewidth=0.5)
        plt.xticks(range(1, 6), fontsize=18)
        plt.yticks(fontsize=18)
        plt.legend(
            loc='best', 
            fontsize=20,
            frameon=True,
            edgecolor='black'
        )

        plt.tight_layout()
        
        # Save the plot
        safe_display_name = title_name.replace(" ", "_").replace("@", "at")
        plt.savefig(f'plots_discrete/{safe_display_name}_{data_name}_{recommender_name}d.pdf',
                    format='pdf', bbox_inches='tight')
        print(f"Saved plot: {safe_display_name}_{data_name}_{recommender_name}d.pdf")
        plt.close()

In [28]:
def process_recommender(data_name, recommender_name):
    DP_DIR = Path("processed_data", data_name)
    files_path = Path("/storage/mikhail/PI4Rec", DP_DIR)
    
    num_users = num_users_dict[data_name]
    num_items = num_items_dict[data_name]
    num_features = num_items_dict[data_name]
    
    with open(Path(files_path, f'pop_dict_{data_name}.pkl'), 'rb') as f:
        pop_dict = pickle.load(f)
    pop_array = np.zeros(len(pop_dict))
    for key, value in pop_dict.items():
        pop_array[key] = value

    test_data = pd.read_csv(Path(files_path,f'test_data_{data_name}.csv'), index_col=0)
    static_test_data = pd.read_csv(Path(files_path,f'static_test_data_{data_name}.csv'), index_col=0)
    
    test_array = static_test_data.iloc[:,:-2].to_numpy()
    items_array = np.eye(num_items)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    all_items_tensor = torch.Tensor(items_array).to(device)

    output_type = output_type_dict[recommender_name]
    hidden_dim = hidden_dim_dict[(data_name,recommender_name)]
    recommender_path = recommender_path_dict[(data_name,recommender_name)]

    kw_dict = {
        'device': device,
        'num_items': num_items,
        'demographic': False,
        'num_features': num_features,
        'pop_array': pop_array,
        'all_items_tensor': all_items_tensor,
        'static_test_data': static_test_data,
        'items_array': items_array,
        'output_type': output_type,
        'recommender_name': recommender_name,
        'files_path': files_path
    }

    recommender = load_recommender()

    print(f"Processing {data_name} dataset with {recommender_name} recommender")
    
    results = {}
    for expl_name in [ 'jaccard', 'cosine', 'lime', 'lxr', 'accent', 'shap']:
        results[expl_name] = eval_one_expl_type(expl_name, data_name, recommender_name, test_array, test_data, items_array, recommender, kw_dict)
    
   # plot_all_metrics(results, data_name, recommender_name)

In [29]:
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, Border, Side

def save_results_to_excel(results, filename):
    wb = Workbook()
    
    # Create MF recommender sheet
    ws_mf = wb.active
    ws_mf.title = "MF Recommender"
    
    # Create VAE recommender sheet
    ws_vae = wb.create_sheet(title="VAE Recommender")
    
    for ws, title in [(ws_mf, "AUC values for explaining an MF recommender."), 
                      (ws_vae, "AUC values for explaining a VAE recommender.")]:
        
        # Add title
        ws['A1'] = f"Table: {title}"
        ws['A1'].font = Font(bold=True)
        ws.merge_cells('A1:G1')
        
        # Add headers
        headers = ['Method', 'k=5', 'k=10', 'k=20', 'DEL', 'INS', 'NDCG']
        for col, header in enumerate(headers, start=1):
            ws.cell(row=3, column=col, value=header).font = Font(bold=True)
        
        # Add data
        for row, (method, values) in enumerate(results.items(), start=4):
            ws.cell(row=row, column=1, value=method)
            for col, value in enumerate(values, start=2):
                ws.cell(row=row, column=col, value=value)
    
    # Apply some styling
    for ws in [ws_mf, ws_vae]:
        for row in ws[f'A3:G{ws.max_row}']:
            for cell in row:
                cell.border = Border(left=Side(style='thin'), 
                                     right=Side(style='thin'), 
                                     top=Side(style='thin'), 
                                     bottom=Side(style='thin'))
    
    wb.save(filename)

def run_and_format_results(data_name, recommender_name):
    results = {}
    for expl_name in ['jaccard', 'cosine', 'lime', 'shap', 'accent', 'lxr']:
        raw_results = eval_one_expl_type(expl_name)
        
        # Extract POS values
        pos_at_5 = raw_results['POS_at_5'][-1]  # Last value represents 100% of items
        pos_at_10 = raw_results['POS_at_10'][-1]
        pos_at_20 = raw_results['POS_at_20'][-1]
        
        # Format results as per the desired output
        results[expl_name.upper()] = [
            pos_at_5,
            pos_at_10,
            pos_at_20,
            raw_results['DEL'][-1],
            raw_results['INS'][-1],
            raw_results['NDCG'][-1]
        ]
    
    return results

In [30]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

def create_comparison_visualizations(all_results, save_dir='./'): 
    """
    Creates comprehensive visualizations comparing methods across datasets and recommenders
    
    Parameters:
    all_results: dict
        Format: {(dataset_name, recommender_name): results_dict}
        where results_dict contains metrics for each explanation method
    save_dir: str
        Directory to save the visualization files
    """
    # Prepare data for plotting
    plot_data = []
    for (dataset, recommender), results in all_results.items():
        for method, metrics in results.items():
            for metric_name, values in metrics.items():
                if isinstance(values, np.ndarray):
                    for step, value in enumerate(values, 1):
                        plot_data.append({
                            'Dataset': dataset,
                            'Recommender': recommender,
                            'Method': method.upper(),
                            'Metric': metric_name,
                            'Step': step,
                            'Value': value
                        })
    
    df = pd.DataFrame(plot_data)

    # 1. Heatmap of method performance across datasets and recommenders
    plt.figure(figsize=(15, 10))
    metrics_to_plot = ['DEL', 'INS', 'NDCG']
    
    for idx, metric in enumerate(metrics_to_plot):
        plt.subplot(1, 3, idx+1)
        pivot_data = df[df['Metric'] == metric].groupby(
            ['Dataset', 'Recommender', 'Method'])['Value'].mean().unstack()
        sns.heatmap(pivot_data, annot=True, fmt='.3f', cmap='YlOrRd')
        plt.title(f'{metric} Performance Comparison')
    
    plt.tight_layout()
    plt.savefig(f'{save_dir}/performance_heatmap.png')
    plt.close()

    # 2. Method stability analysis (standard deviation across steps)
    plt.figure(figsize=(12, 6))
    stability_data = df.groupby(['Method', 'Metric'])['Value'].std().unstack()
    stability_data.plot(kind='bar', width=0.8)
    plt.title('Method Stability Analysis (Standard Deviation Across Steps)')
    plt.xlabel('Explanation Method')
    plt.ylabel('Standard Deviation')
    plt.xticks(rotation=45)
    plt.legend(title='Metrics', bbox_to_anchor=(1.05, 1))
    plt.tight_layout()
    plt.savefig(f'{save_dir}/method_stability.png')
    plt.close()

    # 3. Radar chart for method comparison
    def create_radar_chart(data, methods, metrics):
        angles = np.linspace(0, 2*np.pi, len(metrics), endpoint=False)
        fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
        
        for method in methods:
            values = [data[(data['Method'] == method) & 
                          (data['Metric'] == metric)]['Value'].mean() 
                     for metric in metrics]
            values += values[:1]
            angles_plot = np.concatenate([angles, [angles[0]]])
            ax.plot(angles_plot, values, 'o-', label=method)
            ax.fill(angles_plot, values, alpha=0.25)
        
        ax.set_xticks(angles)
        ax.set_xticklabels(metrics)
        ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
        
        return fig

    metrics_for_radar = ['DEL', 'INS', 'NDCG', 'POS_at_5', 'POS_at_10']
    for dataset in df['Dataset'].unique():
        for recommender in df['Recommender'].unique():
            data_subset = df[(df['Dataset'] == dataset) & 
                           (df['Recommender'] == recommender)]
            fig = create_radar_chart(data_subset, 
                                   df['Method'].unique(), 
                                   metrics_for_radar)
            plt.title(f'{dataset} - {recommender}\nMethod Comparison')
            plt.savefig(f'{save_dir}/radar_{dataset}_{recommender}.png')
            plt.close()

    # 4. Box plots showing distribution of metrics across steps
    plt.figure(figsize=(15, 10))
    for idx, metric in enumerate(['DEL', 'INS', 'NDCG'], 1):
        plt.subplot(1, 3, idx)
        sns.boxplot(data=df[df['Metric'] == metric], 
                   x='Method', y='Value', 
                   hue='Dataset')
        plt.title(f'{metric} Distribution')
        plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f'{save_dir}/metric_distributions.png')
    plt.close()

    # 5. Performance improvement over steps
    plt.figure(figsize=(15, 8))
    for metric in ['DEL', 'INS', 'NDCG']:
        plt.subplot(1, 3, metrics_to_plot.index(metric) + 1)
        for method in df['Method'].unique():
            data = df[(df['Metric'] == metric) & (df['Method'] == method)]
            plt.plot(data.groupby('Step')['Value'].mean(), 
                    marker='o', 
                    label=method)
        plt.title(f'{metric} Progress Over Steps')
        plt.xlabel('Step')
        plt.ylabel('Value')
        if metric == 'NDCG':
            plt.legend(bbox_to_anchor=(1.05, 1))
    plt.tight_layout()
    plt.savefig(f'{save_dir}/progress_over_steps.png')
    plt.close()

    return df

def generate_summary_report(df, save_dir='./'):
    """
    Generates a statistical summary report of the results
    """
    # Calculate aggregate statistics
    summary = df.groupby(['Dataset', 'Recommender', 'Method', 'Metric'])['Value'].agg([
        'mean', 'std', 'min', 'max'
    ]).round(3)
    
    # Save summary to CSV
    summary.to_csv(f'{save_dir}/summary_statistics.csv')
    
    # Calculate method rankings
    rankings = df.groupby(['Dataset', 'Recommender', 'Method', 'Metric'])['Value'].mean().unstack()
    method_ranks = rankings.rank(ascending=False, method='min')
    method_ranks.to_csv(f'{save_dir}/method_rankings.csv')
    
    return summary, method_ranks

In [31]:
def create_results_table(results, data_name, recommender_name):
    """
    Create a comprehensive table of all metrics for each method and masking step
    """
    # Initialize the table structure
    table_data = []
    metrics = ['DEL', 'INS', 'NDCG', 'POS_at_5', 'POS_at_10', 'POS_at_20']
    
    for method in results.keys():
        for step in range(5):  # 5 steps
            row = {
                'Method': method.upper(),
                'Step': step + 1,  # 1-based indexing
                'Dataset': data_name,
                'Recommender': recommender_name
            }
            
            # Add all metrics for this method and step
            for metric in metrics:
                row[metric] = results[method][metric][step]
            
            table_data.append(row)
    
    # Create DataFrame
    df = pd.DataFrame(table_data)
    
    # Save to CSV
    csv_filename = f'results_{data_name}_{recommender_name}.csv'
    df.to_csv(csv_filename, index=False)
    
    # Create and save a formatted Excel file
    wb = Workbook()
    ws = wb.active
    ws.title = f"{data_name}_{recommender_name}_Results"
    
    # Add title (in row 1)
    ws['A1'] = f"Results for {data_name} dataset with {recommender_name} recommender"
    ws.merge_cells('A1:H1')
    ws['A1'].font = Font(bold=True)
    
    # Add headers (in row 3)
    headers = ['Method', 'Step', 'DEL', 'INS', 'NDCG', 'POS@5', 'POS@10', 'POS@20']
    for col, header in enumerate(headers, 1):
        cell = ws.cell(row=3, column=col, value=header)
        cell.font = Font(bold=True)
    
    # Add data with formatting
    current_method = None
    row_num = 4
    for _, row in df.iterrows():
        if current_method != row['Method']:
            current_method = row['Method']
            row_num += 1  # Add space between methods
        
        ws.cell(row=row_num, column=1, value=row['Method'])
        ws.cell(row=row_num, column=2, value=row['Step'])
        ws.cell(row=row_num, column=3, value=float(row['DEL']))
        ws.cell(row=row_num, column=4, value=float(row['INS']))
        ws.cell(row=row_num, column=5, value=float(row['NDCG']))
        ws.cell(row=row_num, column=6, value=float(row['POS_at_5']))
        ws.cell(row=row_num, column=7, value=float(row['POS_at_10']))
        ws.cell(row=row_num, column=8, value=float(row['POS_at_20']))
        
        row_num += 1
    
    # Apply formatting to all cells
    for row in ws.iter_rows(min_row=3, max_row=row_num-1):
        for cell in row:
            cell.border = Border(
                left=Side(style='thin'),
                right=Side(style='thin'),
                top=Side(style='thin'),
                bottom=Side(style='thin')
            )
            if isinstance(cell.value, float):
                cell.number_format = '0.000'
    
    # Adjust column widths (skip merged cells)
    column_widths = {}
    for row in ws.iter_rows(min_row=3):  # Start from row 3 to skip merged cells
        for cell in row:
            if isinstance(cell, MergedCell):
                continue
            col = cell.column_letter
            width = len(str(cell.value)) + 2
            current_width = column_widths.get(col, 0)
            column_widths[col] = max(current_width, width)
    
    # Apply the calculated widths
    for col, width in column_widths.items():
        ws.column_dimensions[col].width = width
    
    # Save Excel file
    excel_filename = f'results_{data_name}_{recommender_name}.xlsx'
    wb.save(excel_filename)
    
    # Return DataFrame for further analysis if needed
    return df

def print_summary_statistics(df):
    """
    Print summary statistics for each method
    """
    print("\nSummary Statistics:")
    print("=" * 80)
    
    methods = df['Method'].unique()
    metrics = ['DEL', 'INS', 'NDCG', 'POS_at_5', 'POS_at_10', 'POS_at_20']
    
    summary_rows = []
    for method in methods:
        method_data = df[df['Method'] == method]
        row = {'Method': method}
        
        for metric in metrics:
            row[f'{metric}_Mean'] = method_data[metric].mean()
            row[f'{metric}_Std'] = method_data[metric].std()
        
        summary_rows.append(row)
    
    summary_df = pd.DataFrame(summary_rows)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    print(summary_df.round(3))
    return summary_df



# Use the functions
def generate_tables(results, data_name, recommender_name):
    df = create_results_table(results, data_name, recommender_name)
    summary_df = print_summary_statistics(df)
    return df, summary_df

In [32]:
data_names = ["ML1M"]#, ,"Yahoo","Pinterest"
recommender_names = [ "MLP"]#"MLP""VAE", "NCF"

# Create a mapping between explainer names and actual explainer functions
explainer_mapping = {
   # 'pop': find_pop_mask,
    'jaccard': find_jaccard_mask,
    'cosine': find_cosine_mask,
    'lime': find_lime_mask,
    'lire': find_lire_mask,
    'lxr': find_lxr_mask,
    'accent': find_accent_mask,
    'shap': find_shapley_mask
}

# Initialize storage for all results
# Initialize storage for all results
all_results = {}

# Create plots directory
plots_dir = Path('plots_discrete')
plots_dir.mkdir(exist_ok=True)

for data_name in data_names:
    DP_DIR = Path("processed_data", data_name)
    files_path = Path("/storage/mikhail/PI4Rec", DP_DIR)

    # Load dataset-specific parameters and data
    num_users = num_users_dict[data_name] 
    num_items = num_items_dict[data_name] 
    num_features = num_items_dict[data_name]
        
    with open(Path(files_path, f'pop_dict_{data_name}.pkl'), 'rb') as f:
        pop_dict = pickle.load(f)
    pop_array = np.zeros(len(pop_dict))
    for key, value in pop_dict.items():
        pop_array[key] = value

    # Load data files
    train_data = pd.read_csv(Path(files_path,f'train_data_{data_name}.csv'), index_col=0)
    test_data = pd.read_csv(Path(files_path,f'test_data_{data_name}.csv'), index_col=0)
    static_test_data = pd.read_csv(Path(files_path,f'static_test_data_{data_name}.csv'), index_col=0)
    
    train_array = train_data.to_numpy()
    test_array = test_data.to_numpy()
    items_array = np.eye(num_items)
    all_items_tensor = torch.Tensor(items_array).to(device)
    test_array = static_test_data.iloc[:,:-2].to_numpy()

    for recommender_name in recommender_names:
        print(f"\nProcessing {data_name} dataset with {recommender_name} recommender")
        
        # Set up recommender-specific parameters
        output_type = output_type_dict[recommender_name]
        hidden_dim = hidden_dim_dict[(data_name,recommender_name)]
        recommender_path = recommender_path_dict[(data_name,recommender_name)]

        kw_dict = {
            'device': device,
            'num_items': num_items,
            'demographic': False,
            'num_features': num_features,
            'pop_array': pop_array,
            'all_items_tensor': all_items_tensor,
            'static_test_data': static_test_data,
            'items_array': items_array,
            'output_type': output_type,
            'recommender_name': recommender_name
        }

        recommender = load_recommender()

        # Process each explanation method
        results = {}
        for expl_name in ['jaccard', 'cosine', 'lime', 'lxr', 'accent', 'shap']:
            try:
                results[expl_name] = eval_one_expl_type(expl_name)
                # Take only first 5 values from each metric array
                results[expl_name] = {
                    metric: values[:5] if len(values) >= 5 else values 
                    for metric, values in results[expl_name].items()
                }
            except Exception as e:
                print(f"Error processing {expl_name} for {data_name} {recommender_name}: {str(e)}")
                continue

        # Store results in the overall dictionary
        all_results[(data_name, recommender_name)] = results
        
        # Generate and save plots
        plot_all_metrics(results, data_name, recommender_name)

print("\nProcessing complete. Results and visualizations have been saved to plots_discrete directory.")


Processing ML1M dataset with MLP recommender


  "import logging"
100%|██████████| 1208/1208 [01:02<00:00, 19.48it/s]


users_DEL_jaccard:  0.956402471365518
users_INS_jaccard:  0.7284667452809629
NDCG_jaccard:  0.9365028494228885
POS_at_5_jaccard:  0.9839403973509933
POS_at_10_jaccard:  0.9943708609271523
POS_at_20_jaccard:  0.9973509933774835
NEG_at_5_jaccard:  0.9996688741721853
NEG_at_10_jaccard:  0.9996688741721853
NEG_at_20_jaccard:  0.9998344370860927


100%|██████████| 1208/1208 [01:01<00:00, 19.52it/s]


users_DEL_cosine:  0.9519159356341851
users_INS_cosine:  0.7568457030327312
NDCG_cosine:  0.8894647104850668
POS_at_5_cosine:  0.9655629139072849
POS_at_10_cosine:  0.9829470198675497
POS_at_20_cosine:  0.9915562913907284
NEG_at_5_cosine:  0.9996688741721853
NEG_at_10_cosine:  1.0
NEG_at_20_cosine:  1.0


100%|██████████| 1208/1208 [01:01<00:00, 19.50it/s]


users_DEL_lime:  0.9469477439311561
users_INS_lime:  0.7595790430261994
NDCG_lime:  0.8849987563263897
POS_at_5_lime:  0.9675496688741722
POS_at_10_lime:  0.9832781456953642
POS_at_20_lime:  0.9917218543046358
NEG_at_5_lime:  0.9991721854304636
NEG_at_10_lime:  1.0
NEG_at_20_lime:  1.0


100%|██████████| 1208/1208 [01:17<00:00, 15.55it/s]


users_DEL_lxr:  0.9434245973874795
users_INS_lxr:  0.784139019255804
NDCG_lxr:  0.8742300574982305
POS_at_5_lxr:  0.9561258278145696
POS_at_10_lxr:  0.9763245033112583
POS_at_20_lxr:  0.9864238410596026
NEG_at_5_lxr:  0.9990066225165563
NEG_at_10_lxr:  0.9998344370860927
NEG_at_20_lxr:  1.0


100%|██████████| 1208/1208 [01:18<00:00, 15.37it/s]


users_DEL_accent:  0.9502151558533409
users_INS_accent:  0.7583083641203429
NDCG_accent:  0.932857665158068
POS_at_5_accent:  0.9831125827814569
POS_at_10_accent:  0.991225165562914
POS_at_20_accent:  0.9942052980132451
NEG_at_5_accent:  0.984933774834437
NEG_at_10_accent:  0.9948675496688741
NEG_at_20_accent:  0.9975165562913907


100%|██████████| 1208/1208 [01:04<00:00, 18.75it/s]


users_DEL_shap:  0.9709385250873912
users_INS_shap:  0.6460185138348318
NDCG_shap:  0.9594799328472047
POS_at_5_shap:  0.997682119205298
POS_at_10_shap:  0.9996688741721853
POS_at_20_shap:  0.9998344370860927
NEG_at_5_shap:  0.9963576158940397
NEG_at_10_shap:  0.9980132450331125
NEG_at_20_shap:  0.9986754966887418
Saved plot: AUC_DEL-PatK_ML1M_MLPd.pdf
Saved plot: AUC_INS-PatK_ML1M_MLPd.pdf
Saved plot: AUC_NDCG-P_ML1M_MLPd.pdf
Saved plot: AUC_POS-Pat5_ML1M_MLPd.pdf
Saved plot: AUC_POS-Pat10_ML1M_MLPd.pdf
Saved plot: AUC_POS-Pat20_ML1M_MLPd.pdf
Saved plot: AUC_NEG-Pat5_ML1M_MLPd.pdf
Saved plot: AUC_NEG-Pat10_ML1M_MLPd.pdf
Saved plot: AUC_NEG-Pat20_ML1M_MLPd.pdf

Processing complete. Results and visualizations have been saved to plots_discrete directory.


In [33]:
plot_all_metrics(results, data_name, recommender_name)

Saved plot: AUC_DEL-PatK_ML1M_MLPd.pdf
Saved plot: AUC_INS-PatK_ML1M_MLPd.pdf
Saved plot: AUC_NDCG-P_ML1M_MLPd.pdf
Saved plot: AUC_POS-Pat5_ML1M_MLPd.pdf
Saved plot: AUC_POS-Pat10_ML1M_MLPd.pdf
Saved plot: AUC_POS-Pat20_ML1M_MLPd.pdf
Saved plot: AUC_NEG-Pat5_ML1M_MLPd.pdf
Saved plot: AUC_NEG-Pat10_ML1M_MLPd.pdf
Saved plot: AUC_NEG-Pat20_ML1M_MLPd.pdf


In [34]:
create_results_table(results, data_name, recommender_name)

Unnamed: 0,Method,Step,Dataset,Recommender,DEL,INS,NDCG,POS_at_5,POS_at_10,POS_at_20
0,JACCARD,1,ML1M,MLP,0.970001,0.666879,0.973947,0.998344,0.999172,0.999172
1,JACCARD,2,ML1M,MLP,0.963509,0.703173,0.954638,0.995033,0.997517,0.998344
2,JACCARD,3,ML1M,MLP,0.956697,0.73351,0.937319,0.989238,0.995033,0.997517
3,JACCARD,4,ML1M,MLP,0.949667,0.758765,0.916401,0.97351,0.99255,0.995861
4,JACCARD,5,ML1M,MLP,0.942137,0.780008,0.900209,0.963576,0.987583,0.995861
5,COSINE,1,ML1M,MLP,0.968171,0.685167,0.948745,0.997517,1.0,1.0
6,COSINE,2,ML1M,MLP,0.960639,0.73085,0.914627,0.986755,0.995861,0.998344
7,COSINE,3,ML1M,MLP,0.952149,0.765268,0.882621,0.964404,0.987583,0.993377
8,COSINE,4,ML1M,MLP,0.943702,0.791354,0.860932,0.944536,0.97351,0.987583
9,COSINE,5,ML1M,MLP,0.934919,0.811589,0.840398,0.934603,0.957781,0.978477
