# This notebook contains the metrics calculation for the PI_Base (In which the baseline is the null vector)

# Imports and initial settings

In [11]:
import pandas as pd
import numpy as np
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
export_dir = os.getcwd()
from pathlib import Path
import pickle
from collections import defaultdict
import time
import torch
import torch.nn as nn
import copy
import torch.nn.functional as F
import optuna
import logging
import matplotlib.pyplot as plt
import ipynb
import importlib
import random
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [12]:
export_dir = Path(os.getcwd()).parent
checkpoints_path = Path(export_dir, "checkpoints")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
output_type_dict = {
    "VAE":"multiple",
    "MLP":"single",
    "NCF": "single"}

num_users_dict = {
    "ML1M":6037,
    "Yahoo":13797, 
    "Pinterest":19155}

num_items_dict = {
    "ML1M":3381,
    "Yahoo":4604, 
    "Pinterest":9362}


recommender_path_dict = {
    ("ML1M","VAE"): Path(checkpoints_path, "VAE_ML1M_0.0007_128_10.pt"),
    ("ML1M","MLP"):Path(checkpoints_path, "MLP1_ML1M_0.0076_256_7.pt"),
    ("ML1M","NCF"):Path(checkpoints_path, "NCF_ML1M_5e-05_64_16.pt"),
    
    ("Yahoo","VAE"): Path(checkpoints_path, "VAE_Yahoo_0.0001_128_13.pt"),
    ("Yahoo","MLP"):Path(checkpoints_path, "MLP2_Yahoo_0.0083_128_1.pt"),
    ("Yahoo","NCF"):Path(checkpoints_path, "NCF_Yahoo_0.001_64_21_0.pt"),
    
    ("Pinterest","VAE"): Path(checkpoints_path, "VAE_Pinterest_12_18_0.0001_256.pt"),
    ("Pinterest","MLP"):Path(checkpoints_path, "MLP_Pinterest_0.0062_512_21_0.pt"),
    ("Pinterest","NCF"):Path(checkpoints_path, "NCF2_Pinterest_9e-05_32_9_10.pt"),}


hidden_dim_dict = {
    ("ML1M","VAE"): None,
    ("ML1M","MLP"): 32,
    ("ML1M","NCF"): 8,

    ("Yahoo","VAE"): None,
    ("Yahoo","MLP"):32,
    ("Yahoo","NCF"):8,
    
    ("Pinterest","VAE"): None,
    ("Pinterest","MLP"):512,
    ("Pinterest","NCF"): 64,
}

# Important to edit:

In [14]:
data_names = ["ML1M"]
#data_names = ["ML1M", "Yahoo", "Pinterest"]

recommender_names = ["MLP"]
# recommender_names = ["MLP", "VAE", "NCF"]

expl_names_list = ['PI_base']

In [15]:
num_steps = 10 # For the interpolation steps in the PI precess - constant 10.
method = "base"
new_file_name = "PI_check"

# Import Functions form other notebooks

In [16]:
from ipynb.fs.defs.recommenders_architecture import *
importlib.reload(ipynb.fs.defs.recommenders_architecture)
from ipynb.fs.defs.recommenders_architecture import *

from ipynb.fs.defs.help_functions import *
importlib.reload(ipynb.fs.defs.help_functions)
from ipynb.fs.defs.help_functions import *

from ipynb.fs.defs.SPINRec_functions import *
importlib.reload(ipynb.fs.defs.SPINRec_functions)
from ipynb.fs.defs.SPINRec_functions import *

# Evaluation help functions

In [17]:
def single_user_expl(user_vector, user_tensor, item_id, item_tensor, num_items, recommender_model, all_items_tensor, user_id = None, mask_type = None):
    user_hist_size = np.sum(user_vector)
    
    if mask_type == 'PI_base':
        sim_items = find_ip_mask(model=recommender_model, user_tensor=user_tensor, item_id=item_id, all_items_tensor=all_items_tensor, num_steps=num_steps, method=method, device=device, recommender_name=recommender_name, train_array=train_array, pop_array=pop_array)   
    else:
        print ("Wrong notebook!!")
        
    POS_sim_items  = list(sorted(sim_items.items(), key=lambda item: item[1],reverse=True))[0:user_hist_size]
    return POS_sim_items

In [18]:
def eval_one_expl_type(expl_name):
    with open(new_file_name, "a") as file:
        file.write(f' ============ Start explaining by {expl_name} ============\n')
        with open(Path(files_path,f'{recommender_name}_{expl_name}_expl_dict.pkl'), 'rb') as handle:
            expl_dict = pickle.load(handle)
        recommender.eval()
        
        users_DEL = []
        users_INS = []
        reciprocal = []
        NDCG = []
        POS_at_1 = []
        POS_at_5 = []
        POS_at_10 = []
        POS_at_20 = []
        POS_at_50 = []
        POS_at_100 = []
        NEG_at_1 = []
        NEG_at_5 = []
        NEG_at_10 = []
        NEG_at_20 = []
        NEG_at_50 = []
        NEG_at_100 = []
        rank_at_1 = []
        rank_at_5 = []
        rank_at_10 = []
        rank_at_20 = []
        rank_at_50 = []
        rank_at_100 = []
    
        num_of_bins=10
    
        with torch.no_grad():
            for i in range(test_array.shape[0]):
                start_time = time.time()
                user_vector = test_array[i]
                user_tensor = torch.FloatTensor(user_vector).to(device)
                user_id = int(test_data.index[i])
    
                item_id = int(get_user_recommended_item(user_tensor, recommender, **kw_dict).detach().cpu().numpy())
                item_vector =  items_array[item_id]
                item_tensor = torch.FloatTensor(item_vector).to(device)
                user_vector[item_id] = 0
                user_tensor[item_id] = 0
    
                user_expl = expl_dict[user_id]
                res = single_user_metrics(user_vector, user_tensor, item_id, item_tensor, num_of_bins, recommender, user_expl, **kw_dict)
            
                users_DEL.append(np.mean(res[0]))
                users_INS.append(np.mean(res[1]))
                NDCG.append(np.mean(res[2]))
                POS_at_5.append(np.mean(res[3]))
                POS_at_10.append(np.mean(res[4]))
                POS_at_20.append(np.mean(res[5]))
                
        file.write(f"{np.mean(users_DEL)}, {np.mean(users_INS)}, {np.mean(reciprocal)}, {np.mean(NDCG)}, {np.mean(POS_at_1)}, {np.mean(NEG_at_1)}, {np.mean(rank_at_1)}, {np.mean(POS_at_5)}, {np.mean(NEG_at_5)}, {np.mean(rank_at_5)}, {np.mean(POS_at_10)}, {np.mean(NEG_at_10)}, {np.mean(rank_at_10)}, {np.mean(POS_at_20)}, {np.mean(NEG_at_20)}, {np.mean(rank_at_20)}, {np.mean(POS_at_50)}, {np.mean(NEG_at_50)}, {np.mean(rank_at_50)}, {np.mean(POS_at_100)}, {np.mean(NEG_at_100)}, {np.mean(rank_at_100)}\n")
        file.write(f"\n")

# START HERE

In [19]:
for data_name in data_names:
    
    DP_DIR = Path("processed_data", data_name)
    files_path = Path(export_dir, DP_DIR)

    num_users = num_users_dict[data_name] 
    num_items = num_items_dict[data_name] 
    num_features = num_items_dict[data_name]
        
    with open(Path(files_path, f'pop_dict_{data_name}.pkl'), 'rb') as f:
        pop_dict = pickle.load(f)
    pop_array = np.zeros(len(pop_dict))
    for key, value in pop_dict.items():
        pop_array[key] = value

    # Data 
    train_data = pd.read_csv(Path(files_path,f'train_data_{data_name}.csv'), index_col=0)
    test_data = pd.read_csv(Path(files_path,f'test_data_{data_name}.csv'), index_col=0)
    
    train_array = train_data.to_numpy()
    test_array = test_data.to_numpy()
    items_array = np.eye(num_items)
    all_items_tensor = torch.Tensor(items_array).to(device)

    
    for recommender_name in recommender_names:
        output_type = output_type_dict[recommender_name]
        hidden_dim = hidden_dim_dict[(data_name,recommender_name)]
        
        recommender_path = recommender_path_dict[(data_name,recommender_name)]

        kw_dict = {'device':device,
                  'num_items': num_items,
                  'demographic':False,
                  'num_features':num_features,
                  'pop_array':pop_array,
                  'all_items_tensor':all_items_tensor,
                  'items_array':items_array,
                  'output_type':output_type,
                  'recommender_name':recommender_name}


        recommender = load_recommender(data_name, hidden_dim, checkpoints_path, recommender_path, **kw_dict)

        file_mode = 'a' if os.path.exists(new_file_name) else 'w'
        with open(new_file_name, file_mode) as file:
            file.write(f' ============ This stats are for {data_name} dataset ============\n')
            file.write(f' ============ & for the recommender {recommender_name} ============\n')
            
        for expl_name in expl_names_list:
            if expl_name == "PI_base":
                ip_expl_dict = {}

                for i in range(test_array.shape[0]):
                #for i in range(3):
                    if i%500 == 0:
                        print(i)
                    user_vector = test_array[i]
                    user_tensor = torch.FloatTensor(user_vector).to(device)
                    user_id = int(test_data.index[i])

                    item_id = int(get_user_recommended_item(user_tensor, recommender, **kw_dict).detach().cpu().numpy())
                    item_vector =  items_array[item_id]
                    item_tensor = torch.FloatTensor(item_vector).to(device)

                    user_vector[item_id] = 0
                    user_tensor[item_id] = 0

                    #This func calls find_ip_mask and in the mask the explainer is configurated
                    ip_expl_dict[user_id] = single_user_expl(user_vector, user_tensor, item_id, item_tensor, num_items, recommender, all_items_tensor=kw_dict['all_items_tensor'], mask_type= 'PI_base')

                with open(Path(files_path,f'{recommender_name}_PI_base_expl_dict.pkl'), 'wb') as handle:
                    pickle.dump(ip_expl_dict, handle)
                            
            eval_one_expl_type(expl_name)
            
print("Done")

0
500
1000
Done
