# This notebook contains functions and classes that will be used for the baselines implementation (LIME, LIRE, Deep Shap)

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import Linear
from torch.nn import functional as F
import scipy as sp
from sklearn.linear_model import Ridge, lars_path
from sklearn.utils import check_random_state
import ipynb
import importlib

In [None]:
from ipynb.fs.defs.help_functions import *
importlib.reload(ipynb.fs.defs.help_functions)
from ipynb.fs.defs.help_functions import *

# LIME & LIRE

In [1]:
class LimeBase(object):
    """Class for learning a locally linear sparse model from perturbed data"""
    
    def __init__(self,
                 kernel_fn,
                 verbose=False,
                 random_state=None):
        """Init function

        Args:
            kernel_fn: function that transforms an array of distances into an
                        array of proximity values (floats).
            verbose: if true, print local prediction values from linear model.
            random_state: an integer or numpy.RandomState that will be used to
                generate random numbers. If None, the random state will be
                initialized using the internal numpy seed.
        """
        self.kernel_fn = kernel_fn
        self.verbose = verbose
        self.random_state = check_random_state(random_state)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    @staticmethod
    def generate_lars_path(weighted_data, weighted_labels):
        """Generates the lars path for weighted data.

        Args:
            weighted_data: data that has been weighted by kernel
            weighted_label: labels, weighted by kernel

        Returns:
            (alphas, coefs), both are arrays corresponding to the
            regularization parameter and coefficients, respectively
        """
        x_vector = weighted_data
        alphas, _, coefs = lars_path(x_vector,
                                     weighted_labels,
                                     max_iter=15,
                                     eps = 2.220446049250313e-7,
                                     method='lasso',
                                     verbose=False)
        return alphas, coefs

    def forward_selection(self, data, labels, weights, num_features):
        """Iteratively adds features to the model"""
        clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state)
        used_features = []
        for _ in range(min(num_features, data.shape[1])):
            max_ = -100000000
            best = 0
            for feature in range(data.shape[1]):
                if feature in used_features:
                    continue
                clf.fit(data[:, used_features + [feature]], labels,
                        sample_weight=weights)
                score = clf.score(data[:, used_features + [feature]],
                                  labels,
                                  sample_weight=weights)
                if score > max_:
                    best = feature
                    max_ = score
            used_features.append(best)
        return np.array(used_features)

    def feature_selection(self, data, labels, weights, num_features, method):
        """Selects features for the model. see explain_instance_with_data to
           understand the parameters."""
        if method == 'none':
            return np.array(range(data.shape[1]))
        elif method == 'forward_selection':
            return self.forward_selection(data, labels, weights, num_features)
        elif method == 'highest_weights':
            clf = Ridge(alpha=0.01, fit_intercept=True,
                        random_state=self.random_state)
            clf.fit(data, labels, sample_weight=weights)

            coef = clf.coef_
            if sp.sparse.issparse(data):
                coef = sp.sparse.csr_matrix(clf.coef_)
                weighted_data = coef.multiply(data[0])
                # Note: most efficient to slice the data before reversing
                sdata = len(weighted_data.data)                
                argsort_data = np.abs(weighted_data.data).argsort()
                # Edge case where data is more sparse than requested number of feature importances
                # In that case, we just pad with zero-valued features
                if sdata < num_features:
                    nnz_indexes = argsort_data[::-1]
                    indices = weighted_data.indices[nnz_indexes]
                    num_to_pad = num_features - sdata
                    indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype)))
                    indices_set = set(indices)
                    pad_counter = 0
                    for i in range(data.shape[1]):
                        if i not in indices_set:
                            indices[pad_counter + sdata] = i
                            pad_counter += 1
                            if pad_counter >= num_to_pad:
                                break
                else:
                    nnz_indexes = argsort_data[sdata - num_features:sdata][::-1]
                    indices = weighted_data.indices[nnz_indexes]
                return indices
            else:
                weighted_data = coef * data[0]
                feature_weights = sorted(
                    zip(range(data.shape[1]), weighted_data),
                    key=lambda x: np.abs(x[1]),
                    reverse=True)
                return np.array([x[0] for x in feature_weights[:num_features]])
        elif method == 'lasso_path':
            weights = np.asarray(weights)
            weighted_data = ((data - np.average(data, axis=0, weights=weights))
                             * np.sqrt(weights[:, np.newaxis]))
            weighted_labels = ((labels - np.average(labels, weights=weights))
                               * np.sqrt(weights))
            nonzero = range(weighted_data.shape[1])
            _, coefs = self.generate_lars_path(weighted_data,
                                               weighted_labels)
            for i in range(len(coefs.T) - 1, 0, -1):
                nonzero = coefs.T[i].nonzero()[0]
                if len(nonzero) <= num_features:
                    break
            used_features = nonzero
            return used_features
        elif method == 'auto':
            if num_features <= 6:
                n_method = 'forward_selection'
            else:
                n_method = 'highest_weights'
            return self.feature_selection(data, labels, weights,
                                          num_features, n_method)

    def explain_instance_with_data(self,
                                   neighborhood_data,
                                   neighborhood_labels,
                                   distances_list,
                                   label,
                                   num_features,
                                   feature_selection='auto',
                                   model_regressor=None,
                                   pos_neg = 'POS'):
        """Takes perturbed data, labels and distances, returns explanation.

        Args:
            neighborhood_data: perturbed data, 2d array. first element is
                               assumed to be the original data point.
            neighborhood_labels: corresponding perturbed labels. should have as
                                 many columns as the number of possible labels.
            distances: distances to original data point.
            label: label for which we want an explanation
            num_features: maximum number of features in explanation
            feature_selection: how to select num_features. options are:
                'forward_selection': iteratively add features to the model.
                    This is costly when num_features is high
                'highest_weights': selects the features that have the highest
                    product of absolute weight * original data point when
                    learning with all the features
                'lasso_path': chooses features based on the lasso
                    regularization path
                'none': uses all features, ignores num_features
                'auto': uses forward_selection if num_features <= 6, and
                    'highest_weights' otherwise.
            model_regressor: sklearn regressor to use in explanation.
                Defaults to Ridge regression if None. Must have
                model_regressor.coef_ and 'sample_weight' as a parameter
                to model_regressor.fit()

        Returns:
            (intercept, exp, score, local_pred):
            intercept is a float.
            exp is a sorted list of tuples, where each tuple (x,y) corresponds
            to the feature id (x) and the local weight (y). The list is sorted
            by decreasing absolute value of y.
            score is the R^2 value of the returned explanation
            local_pred is the prediction of the explanation model on the original instance
        """
        weights = self.kernel_fn(distances_list)
        labels_column = neighborhood_labels[:,label]  
        used_features = np.nonzero(neighborhood_data[0])[0] #.tolist()
    
        if model_regressor is None:
            model_regressor = Ridge(alpha=1, fit_intercept=True,
                                    random_state=self.random_state)
        easy_model = model_regressor
        easy_model.fit(neighborhood_data[:, used_features],
                       labels_column, sample_weight=weights)
        prediction_score = easy_model.score(
            neighborhood_data[:, used_features],
            labels_column, sample_weight=weights)

        local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1))

        if self.verbose:
            print('Intercept', easy_model.intercept_)
            print('Prediction_local', local_pred,)
            print('Right:', neighborhood_labels[0, label])
        if pos_neg =='POS':
            return sorted(zip(used_features, easy_model.coef_),
                       key=lambda x: x[1], reverse=True)
        elif pos_neg =='NEG':
            return sorted(zip(used_features, easy_model.coef_),
                       key=lambda x: x[1], reverse=False)
        elif pos_neg == 'ABS':
            return sorted(zip(used_features, easy_model.coef_),
                       key=lambda x: np.abs(x[1]), reverse=False)
        else: 
            return('Unfamiliar method')

In [2]:
def distance_to_proximity(distances_list):
    total_distance = sum(distances_list)
    proximity_list = []
    for i in range(len(distances_list)):
        try:
            proximity = 1 - distances_list[i] / total_distance
        except ZeroDivisionError:
            proximity = 0.5  # Assign proximity as 0 or any other logic to handle the error
            
        proximity_list.append(proximity)

    return proximity_list

In [4]:
def gaussian_kernel(distances, sigma=1):
    kernel = [np.exp(-distances[i]**2 / (2 * sigma**2)) for i in range(len(distances))]
    return kernel

In [6]:
def get_lime_args(user_vec, item_id, model, item_tensor, min_pert = 10, max_pert = 20, num_of_perturbations = 5, seed = 0, **kw):
    output_type=kw['output_type']
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    user_vec[item_id]=0
    neighborhood_data = [user_vec]
    user_tensor =torch.Tensor(user_vec).to(device)
    if output_type == 'single':
        user_labels = [float(i) for i in recommender_run(user_tensor, model, item_tensor, None, 'vector', **kw).cpu().detach().numpy()]
    else:
        user_labels = model(user_tensor)[0].tolist()
    neighborhood_labels = [user_labels]
    distances = [0]
    np.random.seed(seed)

    for perturbation in range(num_of_perturbations):
        neighbor = user_vec.copy()
        dist = np.random.randint(min_pert, high=max_pert)
        pos = min(np.random.randint(0, high=dist), np.sum(user_vec))
        neg = dist-pos
        neg_locations = np.random.choice(np.where(neighbor==0)[0],neg)
        pos_locations = np.random.choice(np.where(neighbor==1)[0],pos)
        for l in neg_locations:
            neighbor[l]=1
        for l in pos_locations:
            neighbor[l]=0
        neighborhood_data.append(neighbor)
        distances.append(dist)
        if output_type == 'single':
            lables = [float(i) for i in recommender_run(torch.Tensor(neighbor).to(device), model, item_tensor, None, 'vector', **kw).cpu().detach().numpy()]
            neighborhood_labels.append(lables)
        else:
            neighborhood_labels.append(model(torch.Tensor(neighbor).to(device))[0].tolist())
        
        
    neighborhood_data = np.array(neighborhood_data)
    neighborhood_labels = np.array(neighborhood_labels)
    return neighborhood_data, neighborhood_labels, distances, item_id
    

In [9]:
def get_lire_args(user_vec, item_id, model, item_tensor, train_array, num_of_perturbations, proba = 0.1, seed = 0, **kw):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    user_vec[item_id]=0
    neighborhood_data = [user_vec]
    user_tensor =torch.Tensor(user_vec).to(device)
    neighborhood_labels = []
    distances = []
    np.random.seed(seed)

    stds = np.std(train_array, axis=0)
    num_items = kw['num_items']
    num_features = kw['num_features']
    
    users = user_tensor.expand(num_of_perturbations, num_features).detach().clone()
    neighborhood_data = torch.zeros(num_of_perturbations, 1, device=device)
    for item in range(num_features):
        item_perturbation = nn.init.normal_(torch.zeros(num_of_perturbations, 1, device=device), 0, stds[item])
        neighborhood_data = torch.hstack((neighborhood_data,item_perturbation))
        
    neighborhood_data = neighborhood_data[:,1:]
    
    rd_mask = torch.zeros(num_of_perturbations, num_features, device=device).uniform_() > (1. - proba)
    neighborhood_data = neighborhood_data * rd_mask * (users != 0.)
    neighborhood_data = users + neighborhood_data
    neighborhood_data = torch.clamp(neighborhood_data, 0, 1)

    neighborhood_data = torch.vstack((user_tensor,neighborhood_data))
    for perturbation in range(num_of_perturbations+1):
        neighbor = neighborhood_data[perturbation,:]
        distances.append(torch.sum(torch.abs(torch.sub(user_tensor, neighbor))).item())
        
        labels = [float(i) for i in recommender_run(neighbor, model, item_tensor, None, 'vector', **kw).cpu().detach().numpy()]
        neighborhood_labels.append(labels)

    neighborhood_data = np.array(torch.abs(neighborhood_data).cpu().detach().numpy())
    neighborhood_labels = np.array(neighborhood_labels)
    return neighborhood_data, neighborhood_labels, distances, item_id

# Wrappers for Depp Shap

In [None]:
class MLPWrapper(nn.Module):
    def __init__(self, model, all_items_tensor):
        super(MLPWrapper, self).__init__()
        self.model = model
        self.items = all_items_tensor

    def forward(self, user_tensor):
        items_tensor = self.items
        user_res = self.model(user_tensor, items_tensor)
        user_catalog = torch.ones_like(user_tensor) - user_tensor
        user_recommendations = torch.mul(user_res, user_catalog)
        
        return user_recommendations

In [None]:
class VAEWrapper(nn.Module):
    def __init__(self, model):
        super(VAEWrapper, self).__init__()
        self.model = model

    def forward(self, rating_matrix):
        # Ensure rating_matrix is on the correct device and type
        device = next(self.model.parameters()).device
        rating_matrix = rating_matrix.to(device).float()
        
        rating_matrix.requires_grad_(True)
        was_training = self.model.training
        self.model.eval()

        output = self.model(rating_matrix)
        if was_training:
            self.model.train()

        # Mask the items the user has already interacted with
        # Create a mask where 1 indicates items the user has not interacted with
        user_catalog = (rating_matrix == 0).float()
        user_recommendations = output * user_catalog
        return user_recommendations

In [None]:
class NCFWrapper(nn.Module):
    def __init__(self, model, items_tensor):
        super(NCFWrapper, self).__init__()
        self.model = model
        self.items = items_tensor

        # Precompute item embeddings
        device = next(self.model.parameters()).device
        self.items = self.items.to(device).float()
        self.model.eval()

        with torch.no_grad():
            if self.model.model != 'MLP':
                self.embed_item_GMF = self.model.embed_item_GMF(self.items)
            else:
                self.embed_item_GMF = None

            if self.model.model != 'GMF':
                self.embed_item_MLP = self.model.embed_item_MLP(self.items)
            else:
                self.embed_item_MLP = None

    def forward(self, user_tensor):
        device = next(self.model.parameters()).device
        user_tensor = user_tensor.to(device).float()

        batch_size = user_tensor.size(0)
        num_items = self.items.size(0)

        # Compute user embeddings
        if self.model.model != 'MLP':
            embed_user_GMF = self.model.embed_user_GMF(user_tensor)
        else:
            embed_user_GMF = None

        if self.model.model != 'GMF':
            embed_user_MLP = self.model.embed_user_MLP(user_tensor)
        else:
            embed_user_MLP = None

        # Compute item embeddings
        if self.model.model != 'MLP':
            embed_item_GMF = self.embed_item_GMF
        else:
            embed_item_GMF = None

        if self.model.model != 'GMF':
            embed_item_MLP = self.embed_item_MLP
        else:
            embed_item_MLP = None

        # Prepare per-user-per-item interactions
        if self.model.model != 'MLP':
            embed_user_GMF_expanded = embed_user_GMF.unsqueeze(1).expand(-1, num_items, -1)
            embed_item_GMF_expanded = embed_item_GMF.unsqueeze(0).expand(batch_size, -1, -1)
            output_GMF = embed_user_GMF_expanded * embed_item_GMF_expanded
            output_GMF_flat = output_GMF.view(-1, output_GMF.size(-1))
        else:
            output_GMF_flat = None

        if self.model.model != 'GMF':
            embed_user_MLP_expanded = embed_user_MLP.unsqueeze(1).expand(-1, num_items, -1)
            embed_item_MLP_expanded = embed_item_MLP.unsqueeze(0).expand(batch_size, -1, -1)
            interaction = torch.cat((embed_user_MLP_expanded, embed_item_MLP_expanded), -1)
            interaction_flat = interaction.view(-1, interaction.size(-1))
            output_MLP_flat = self.model.MLP_layers(interaction_flat)
        else:
            output_MLP_flat = None

        # Combine outputs
        if self.model.model == 'GMF':
            concat = output_GMF_flat
        elif self.model.model == 'MLP':
            concat = output_MLP_flat
        else:
            concat = torch.cat((output_GMF_flat, output_MLP_flat), -1)

        # Pass through prediction layer
        prediction_flat = self.model.predict_layer(concat)
        prediction_flat = self.model.sigmoid(prediction_flat)
        predictions = prediction_flat.view(batch_size, num_items)

        # Mask out items already interacted with
        user_catalog = (1 - user_tensor).to(device)
        predictions = predictions * user_catalog
        
        return predictions