In [1]:
import os
import numpy as np
import scipy.stats as st
from tqdm.notebook import trange
import pandas as pd
from tqdm import tqdm

from itertools import combinations
import torch.nn.functional as F
import torch

In [2]:
# get currently working directory
base_dir = os.getcwd()

In [3]:
def load_model(model_type='pooling', models_folder='../models'):
    ofile = f'{model_type}_model_1m_20interactions.pt'
    return torch.load(os.path.join(base_dir, models_folder, ofile))

In [4]:
def save_model(model, model_type='pooling', models_folder='../models'):
    ofile = f'{model_type}_model_1m_20interactions.pt'
    return torch.save(model, os.path.join(base_dir, models_folder, ofile))

In [5]:
class StaticVars:
    FLOAT_MAX = np.finfo(np.float32).max
    INT_MAX = np.iinfo(np.int32).max

In [4]:
class InteractionsInfo:
    """
    Represents information about the interactions and counterfactual search for a user-item pair.

    Attributes:
    - user_id (int): User ID.
    - item_id (int): Item ID.
    - available_budget (int): Available budget for the search.
    - satisfy_objective (bool): Flag indicating whether the objective is satisfied.
    - satisfy_constraints (bool): Flag indicating whether constraints are satisfied.
    - recommendation (list): List of recommended items.
    - interactions (dict): Dictionary containing interaction information (original, initial, best).
    - loss (dict): Dictionary containing loss information (initial, best).
    - iter_no (dict): Dictionary containing iteration information (initial, best, total).
    - budget_spent (dict): Dictionary containing budget spent information (initial, best, total).
    - solution_found (bool): Flag indicating whether a solution is found.
    - pos (int): Position of the item.
    - cfs_dist (int): Counterfactual distance.
    - stats_per_cardinality (list): List containing statistics per cardinality.
    - max_updated_card (int): Maximum updated cardinality.
    - len_interactions (int): Length of original interactions.

    Methods:
    - __init__: Initializes the InteractionsInfo object.
    - __str__: Generates a string representation of the object.
    - set_flags: Sets the flags for satisfying objective and constraints.
    - needs_update: Checks if an update is needed based on the loss.
    - set_values: Sets values for the object based on predictions and losses.
    - update_values: Updates values based on predictions, ranking, and losses.
    """
    score = 0
    y_loss = 1.0
    proximity_loss = StaticVars.FLOAT_MAX

    def __init__(self, uid, iid, interactions, budget=1000, missing_target_in_topk=False, fobj=True, fconstraint=True):
        """
        Initializes an InteractionsInfo object.

        Parameters:
        - uid (int): User ID.
        - iid (int): Item ID.
        - interactions (list): List of user-item interactions.
        - budget (int): Available budget for the search.
        - missing_target_in_topk: Boolean indicating whether to find CFs for missing (True) or present (False) target items.
        - fobj (bool): Flag indicating whether the objective is satisfied.
        - fconstraint (bool): Flag indicating whether constraints are satisfied.
        """
        self.user_id = uid
        self.item_id = iid
        self.available_budget = budget

        self.satisfy_objective = fobj
        self.satisfy_contraints = fconstraint

        self.recommendation = None
        if missing_target_in_topk : 
            self.interactions = dict(original=interactions, initial=interactions, best=interactions)
        else : 
            self.interactions = dict(original=interactions, initial=[], best=[])
        self.loss = dict(initial=StaticVars.FLOAT_MAX, best=StaticVars.FLOAT_MAX)
        self.iter_no = dict(initial=budget, best=budget, total=budget)
        self.budget_spent = dict(initial=budget, best=budget, total=budget)

        self.solution_found = False
        self.pos = StaticVars.INT_MAX
        self.cfs_dist = len(interactions)
        self.stats_per_cardinality = [0] * len(interactions)
        self.max_updated_card = -1

        self.len_interactions = len(self.interactions['original'])
        self.missing_target_in_topk = missing_target_in_topk

    def __str__(self):
        """
        Generates a string representation of the InteractionsInfo object.

        Returns:
        - str: String representation of the object.
        """
        sorted_recommended_items = [
            (n[0], n[1].detach().numpy().flatten()[0]) if isinstance(n[1], torch.Tensor)
            else (n[0], n[1]) for n in self.recommendation
        ]

        return (f'\n'
                f'user_id: {self.user_id}, item_id: {self.item_id}\n'
                f'yloss: {round(self.y_loss, 4)}, proximity_loss: {int(self.proximity_loss)}\n'
                f'Item {self.item_id} is in position {self.pos} now!!!\n'
                f'Found in iteration {self.iter_no["best"], {self.budget_spent}} and the interacted items are {self.interactions["best"]}\n'
                f'10-best recommended items {sorted_recommended_items}\n')

    def set_flags(self, do_objective, do_contraints):
        """
        Sets the flags for satisfying objective and constraints.

        Parameters:
        - do_objective (bool): Flag indicating whether the objective is satisfied.
        - do_contraints (bool): Flag indicating whether constraints are satisfied.
        """
        self.satisfy_objective = do_objective
        self.satisfy_contraints = do_contraints

    def needs_update(self, loss):
        """
        Checks if an update is needed based on the loss.

        Parameters:
        - loss (dict): Dictionary containing loss information.

        Returns:
        - bool: True if an update is needed, False otherwise.
        """
        if len(loss):
            does_contraints = (not self.satisfy_contraints or self.y_loss > loss['yloss'])
            does_objective = (not self.satisfy_objective or self.proximity_loss >= loss['proximity'])

            if does_contraints and does_objective: return True

        return False

    
    def set_values(self, predictions, interacted_items, tot_interacted_items, loss, iter_no, k=10):
        """
        Set values for recommendation results and evaluation metrics.

        Returns:
            None

        Sets the following attributes:
            - self.pos (int): Ranking position of the selected item in the list of predictions.
            - self.recommends (list): List of top-k recommendations sorted by prediction scores.
            - self.iter_found (int): Iteration number where solution was found.
            - self.y_loss (float): Loss value related to the predicted outcome.
            - self.proximity_loss (float): Proximity loss value.
            - self.interactions (list): List of items interacted with by the user.
            - self.complete_interactions (int): Total number of interacted items by all users.
            - self.solution_found (bool): Flag indicating if a solution was found.
        """


        # get the ranking position of selected item in the list
        rk_data = st.rankdata(-predictions, method='ordinal')
        self.pos = rk_data[self.item_id]
#         self.recommends = sorted(enumerate(predictions), key=lambda x: x[1], reverse=True)[:k]
        accepted_preds = (rk_data <= k).nonzero()
        self.recommends = sorted(
            zip(predictions[accepted_preds], *accepted_preds), 
            key=lambda x: x[0], reverse=True)
        self.iter_found = iter_no
        self.y_loss = loss[0]
        self.proximity_loss = loss[1]
        self.interactions = interacted_items
        self.complete_interactions = tot_interacted_items

        self.solution_found = True

    def update_values(self, predictions, ranking, interacted_items, loss, iter_no, residual_budget, k):
        if (self.missing_target_in_topk and ranking[self.item_id] <= k) or (not self.missing_target_in_topk and ranking[self.item_id] > k):
            # print("update_values!!!")
            if loss < self.loss['best']:
                # print("better loss!!!",  loss)
                self.pos = ranking[self.item_id]
                accepted_preds = (ranking <= k).nonzero()
                self.recommendation = sorted(
                    zip(predictions[accepted_preds], *accepted_preds),
                    key=lambda x: x[0], reverse=True)

                self.iter_no['best'] = iter_no
                self.budget_spent['best'] = self.available_budget - residual_budget
                self.loss['best'] = loss
                self.interactions['best'] = interacted_items

                if not self.solution_found:
                    self.iter_no['initial'] = iter_no
                    self.budget_spent['initial'] = self.available_budget - residual_budget
                    self.loss['initial'] = loss
                    self.interactions['initial'] = interacted_items

                if self.missing_target_in_topk:
                    self.cfs_dist = len(self.interactions['best']) - self.len_interactions
                    index = len(interacted_items) - self.len_interactions - 1

                    if index < 0 or index >= len(self.stats_per_cardinality):
                        print("Bug !!!!!")
                        print("index : ", index)
                        print(interacted_items)
                        print(self.interactions)
                        print(self.len_interactions)
                else:
                    self.cfs_dist = self.len_interactions - len(self.interactions['best'])
                    index = self.len_interactions - len(interacted_items) - 1

                self.stats_per_cardinality[index] = max(
                    self.available_budget - residual_budget, self.stats_per_cardinality[index])

                self.solution_found = True

            self.iter_no['total'] = iter_no
        self.budget_spent['total'] = self.available_budget - residual_budget


NameError: name 'StaticVars' is not defined

In [7]:
class ComputeLoss:
    def __init__(self, target, original_input, top_k=10, weights=[1, 0, 0], total_CFs=1, missing_target_in_topk=False):
        self.target_item = target
        self.top_k = top_k
        self.original_items = original_input
        self.total_CFs = total_CFs
        self.missing_target_in_topk = missing_target_in_topk
        (self.proximity_weight, self.diversity_weight, self.regularization_weight) = weights

    def _compute_yloss(self, target_score, kth_score):
        if self.missing_target_in_topk:
            yloss = max(0, kth_score / target_score - 1.0)
        else:
            yloss = max(0, target_score / kth_score - 1.0)
        return yloss

    def _compute_dist(self, x_hat, x1):
        """Compute weighted distance between two vectors."""
        diff = np.setdiff1d(x1, x_hat)
        return len(diff)

    def _compute_proximity_loss(self, cfs):
        proximity_loss = 0.0
        for i in range(self.total_CFs):
            if self.missing_target_in_topk:
                proximity_loss += self._compute_dist(self.original_items, cfs)
            else:
                proximity_loss += self._compute_dist(cfs, self.original_items)

        return proximity_loss / np.multiply(len(self.original_items), self.total_CFs)

    def _compute_diversity_loss(self):
        proximity_loss = 0.0
        return proximity_loss / self.total_CFs

    def _compute_regularization_loss(self, x):
        """Adds a linear equality constraints to the loss functions - to ensure all levels of a categorical variable sums to one"""
        regularization_loss = 0.0
        for i in range(self.total_CFs):
            pass
#             for v in self.encoded_categorical_feature_indexes:
#                 regularization_loss += torch.pow((torch.sum(self.cfs[i][v[0]:v[-1]+1]) - 1.0), 2)
#             regularization_loss += max(0, x - 1.0)

        return regularization_loss

    def compute_loss(self, cfs, preds, ranking, total_CFs=1):
        """Computes the overall loss"""
        yloss = self._compute_yloss(preds[self.target_item], preds[(ranking == self.top_k).nonzero()][0])
        proximity_loss = self._compute_proximity_loss(cfs) if self.proximity_weight > 0 else 0.0
        diversity_loss = self._compute_diversity_loss() if self.diversity_weight > 0 else 0.0
        regularization_loss = self._compute_regularization_loss(yloss) if self.regularization_weight > 0 else 0.0

        loss = yloss + (self.proximity_weight * proximity_loss) \
            - (self.diversity_weight * diversity_loss) \
            + (self.regularization_weight * regularization_loss)
        return loss

In [None]:
import random 


def find_sample_with_jaccard(target_item, user_interactions, jaccard_sims_matrix, k=20, worst_items = False):
    """
    Find the top k items for a user that have the highest Jaccard similarity to the target item
    based on the given similarity matrix.

    Parameters:
    - target_item: Index of the target item.
    - user_interactions: List of items interacted with by the user.
    - jaccard_sims_matrix: Jaccard similarity matrix.
    - k: The number of items to select.

    Returns:
    - A list of the top k items for the user.
    """
    # Adjust the indices in user_interactions to start from 0 (in the movielens dataset, index starts at 1)
    user_interactions_adjusted = np.array(user_interactions) - 1

    target_item_adjusted = target_item - 1
    
    items_not_in_interactions = np.setdiff1d(range(jaccard_sims_matrix.shape[1]), user_interactions_adjusted)

    # Get the Jaccard similarities between the target item and items not in interactions
    ranked_items = st.rankdata(jaccard_sims_matrix[target_item_adjusted, items_not_in_interactions])

    # Get the top k items
    if worst_items :
        top_items_indices = np.argsort(ranked_items)[-k:]
    else :
        top_items_indices = np.argsort(ranked_items)[:k]

    # Get the actual item indices (starting from 1)
    best_items = list(items_not_in_interactions[top_items_indices])

    best_items = np.array(best_items)

    best_items += 1

    best_items = best_items.tolist()


    return best_items

def find_sample_with_recommender(target_item, user_interactions, model, k=20):
    predictions = -model.predict(target_item)
    predictions[user_interactions] = StaticVars.FLOAT_MAX
    sorted_predictions = predictions.argsort()
    best_items = sorted_predictions[:k//2]

    remaining_items = sorted_predictions[k//2:-len(user_interactions)]
    random_indices = np.random.choice(remaining_items, k//2, replace=False)

    combined_items = np.concatenate((best_items, random_indices))

    np.random.shuffle(combined_items)

    combined_items = combined_items.tolist()
    
    return combined_items

In [1]:
def _retrieve_solutions(params):
    """
    Retrieve counterfactual solutions for a specific target position for a specific user. Using a given recommendation model, a search strategy it will search for 
    CFs while considering budget constraints and user preferences (sim_matrix) to explain : 
     - if missing_target_in_topk is True, why the target position in the top recommendation of the model is not in the top_k recommendations of the user. (pos will then be greater than top_k)
     - if missing_target_in_topk is False, why the target position in the top recommendation of the model is in the top_k recommendations of the user. (pos will then be lower or equals than top_k)

    Parameters:
    - params (tuple): Tuple containing the following elements:
        - user_id: User ID for whom counterfactual solutions are sought (lowest id starts at 1).
        - d: Dataset object containing user-item interactions.
        - m: Recommender system model.
        - sf: Counterfactual search strategy function.
        - pos: Target position for which CFs are generated.
        - init_budget: Initial budget for CF generation.
        - top_k: Number of top-ranked items to consider.
        - missing_target_in_topk: Boolean indicating whether to find CFs for missing (True) or present (False) target items.
        - sim_matrix: Item-item similarity matrix used in the CF generation process.
        - kwargs: Additional keyword arguments to be passed to the CF generation process.

    Returns:
    - list: List of InteractionsInfo object containing information about the search process and counterfactual solutions for the specified user.
    """
    user_id, d, m, sf, pos, init_budget, top_k, missing_target_in_topk, sim_matrix, kwargs = params
    
    _total_loss = []
    seq = d.sequences[d.user_ids == user_id]
    for j in range(min(1, len(seq))): 
        if all(v > 0 for v in seq[j]): # To remove subsequences shorter than "max_sequence_length" left-padded with zeros (see dataset documentation).
            items_interacted = seq[j].copy()

            predictions = -m.predict(items_interacted) # We put a "-" to after sort the list in increasing order (the higher the value of the prediction, the higher its ranking in the recommendations)
            predictions[items_interacted] = StaticVars.FLOAT_MAX # This is done to obtain the items with which the user interacted at the end of the recommendations of the RS.
            predictions[0] = StaticVars.FLOAT_MAX # This is done because there is no item at index 0 (item_id starts at 1)
            # predictions = predictions[1:]
            negative_sample = [] # Will be used in the missing_target_in_topk case to select a sample of best candidate items to add to have the target item in top k

            if missing_target_in_topk:
                # give the index in the predictions array that corresponds to the target pos in the sorted recommendation

                target_item = predictions.argsort()[max(top_k + 1, int(pos))]
                
                # We chose this length to match with the size of the sample in the other mode (when missing_target_in_topk is False).
                size_sample = len(items_interacted)

                # give a sample of the dataset of items similar to the target item.
                negative_sample = find_sample_with_jaccard(target_item, items_interacted, sim_matrix, size_sample)

            else:
                # give the index in the predictions array that corresponds to the target pos in the sorted recommendations
                target_item = predictions.argsort()[min(top_k, int(pos))]
                
            search_info = InteractionsInfo(user_id, target_item, items_interacted, init_budget, missing_target_in_topk=missing_target_in_topk)
            loss = ComputeLoss(target_item, items_interacted, top_k, missing_target_in_topk=missing_target_in_topk)

            if missing_target_in_topk:
                # In this mode as we want to search to add item to have the target_item in the top recommendations, we use the strategies to choose items among a sample of items not in the interacted items.
                strategy = sf(target_item, negative_sample, len(negative_sample), init_budget, m, **kwargs)
                
            else:
                strategy = sf(target_item, items_interacted, d.max_sequence_length, init_budget, m, **kwargs)
            
            # start of the search strategy
            counter = 1
            budget = strategy.get_init_budget()
            while budget > 0:
                perm, curr_budget = strategy.next_comb(reverse=search_info.solution_found)
                if perm is None: break 
                
                if missing_target_in_topk:
                    # As the strategies were initially designed to select items to remove from the interactions to reject the target item out of top_k, and we gave a sample 
                    # of similar items of the target in the "missing_target_in_topk" mode, we now want to have the items that strategy decided to remove to add them to the 
                    # interacted items
                    set1 = set(negative_sample)
                    set2 = set(perm)
                    items_to_add = list(set1 - set2)

                    perm = items_interacted.tolist() + items_to_add # maybe a bug TODO test it

                # update the predictions of each items (and then the rank) based on the "new" items interactions of the user
                preds = m.predict(perm)
                preds[perm] = -StaticVars.FLOAT_MAX
                preds[0] = -StaticVars.FLOAT_MAX
                print("is equal to 0? : ", preds[0])
                # /!\ the rankdata function give 1 as the lower number for a rank (and not 0)
                rk_data = st.rankdata(-preds, method='ordinal')
                
                computed_loss = loss.compute_loss(perm, preds, rk_data)

                # keep info about the best solution found depending on an objective function
                search_info.update_values(
                    preds, rk_data, perm, computed_loss, counter, curr_budget, top_k)
                
                if hasattr(strategy, 'set_score'):
                    reverse_search = strategy.set_score(
                        len(items_interacted) - len(perm) - 1,
                        preds[target_item],
                        preds[(rk_data == top_k).nonzero()][0]
                    )

                    if reverse_search:
                        _total_loss[j].solution_found = False

                strategy.reset_costs()
                counter += 1
        
                budget = curr_budget


            _total_loss.append(search_info)

    return _total_loss
import heapq
def _find_cfs(dataset, model, strategy_func, target_item_pos, missing_target_in_topk, sim_matrix, no_users=None, init_budget=1000,
              max_allowed_permutations=None, top_k=10, total_CFs=1, num_processes=10, **kwargs):
    """
    Find counterfactual explanations (CFs) for a recommender system.

    Parameters:
    - dataset: Dataset object containing user-item interactions.
    - model: Recommender system model.
    - strategy_func: Counterfactual explanation strategy function.
    - target_item_pos (list): List of target positions for which CFs are to be generated.
    - missing_target_in_topk (bool): Indicates whether we search CFs for "why" the target item is missing from the top-k recommendations (True) or present in the top-k recommendations (False).
    - sim_matrix: Item-item similarity matrix used in the CF generation process.
    - no_users (int, optional): Number of users to consider, default is None (use maximum user ID in the dataset). /!\ in the dataset the number in the ids starts at 1, it is not 0-based like usually in python
    - init_budget (int): Initial budget for CF generation, default is 1000.
    - max_allowed_permutations (int, optional): Maximum allowed permutations during CF generation, default is None.
    - top_k (int): Number of top-ranked items to consider during CF generation, default is 10.
    - total_CFs (int): Total number of CFs to generate for each target position, default is 1.
    - num_processes (int): Number of processes to use for parallelization, default is 10.
    - **kwargs: Additional keyword arguments to be passed to the CF generation process.

    Returns:
    - dict: Dictionary containing CFs for each target position and user ID. Each key represents a target position and the value is a list extends by the result of the "_retrieve_solutions" function for each user ID.
    """

    print(f'The backend used is: {strategy_func.class_name}')

    num_users = no_users or max(dataset.user_ids) # In case of no_users = None we take the max in the user_ids of the dataset (not "max(dataset.user_ids) + 1" because the ids start at 1) 
    best_tot_loss_data = dict.fromkeys(target_item_pos)

    with tqdm(total=len(target_item_pos), desc='target position loop') as pbar:
        for pos in target_item_pos:
            pbar.update(10)
            best_tot_loss_data[pos] = []
            for user_id in trange(1, num_users + 1, desc='users loop', leave=False):
                retrieved_solution = _retrieve_solutions((
                    user_id, dataset, model, strategy_func, pos, init_budget, top_k, missing_target_in_topk, sim_matrix, kwargs))
                best_tot_loss_data[pos].extend(retrieved_solution)

    return best_tot_loss_data

def _find_specific_cfs_(dataset, model, strategy_func, specific_pos, missing_target_in_topk, sim_matrix, specific_uid, init_budget=1000, top_k=10, **kwargs):
    return _retrieve_solutions((specific_uid, dataset, model, strategy_func, specific_pos, init_budget, top_k, missing_target_in_topk, sim_matrix, kwargs))

def retrieve_solutions_specific_sequence(user_id, d, m, sf, init_budget, top_k, missing_target_in_topk, sim_matrix, items_interacted, target_item, negative_sample, **kwargs):
    _total_loss = []
    
    predictions = -m.predict(items_interacted) # We put a "-" to after sort the list in increasing order (the higher the value of the prediction, the higher its ranking in the recommendations)
    predictions[items_interacted] = StaticVars.FLOAT_MAX # This is done to obtain the items with which the user interacted at the end of the recommendations of the RS.
    print("check first pred = 0", predictions[0])
    top_21 = heapq.nlargest(21, predictions)
    print("biggest value : ", top_21)
    top_21 = heapq.nsmallest(21, predictions)
    print("smallest value : ", top_21)
    search_info = InteractionsInfo(user_id, target_item, items_interacted, init_budget, missing_target_in_topk=missing_target_in_topk)
    loss = ComputeLoss(target_item, items_interacted, top_k, missing_target_in_topk = missing_target_in_topk)
    
    if missing_target_in_topk:
        # In this mode as we want to search to add item to have the target_item in the top recommendations, we use the strategies to choose items among a sample of items not in the interacted items.
        strategy = sf(target_item, negative_sample, len(negative_sample), init_budget, m, **kwargs)
    else:
        strategy = sf(target_item, items_interacted, d.max_sequence_length, init_budget, m, **kwargs)

    # start of the search strategy 
    counter = 1
    budget = strategy.get_init_budget()
    
    while budget > 0:
        perm, curr_budget = strategy.next_comb(reverse=search_info.solution_found)
        if perm is None: break 
        
        if missing_target_in_topk:
            # As the strategies were initially designed to select items to remove from the interactions to reject the target item out of top_k, and we gave a sample 
            # of similar items of the target in the "missing_target_in_topk" mode, we now want to have the items that strategy decided to remove to add them to the 
            # interacted items
            set1 = set(negative_sample)
            set2 = set(perm)
            items_to_add = list(set1 - set2)
            if isinstance(items_interacted, list):
                perm = items_interacted + items_to_add
            else: 
                perm = items_interacted.tolist() + items_to_add # maybe a bug TODO test it
        
        # update the predictions of each items (and then the rank) based on the "new" items interactions of the user
        preds = m.predict(perm)
        preds[perm] = -StaticVars.FLOAT_MAX
        # /!\ the rankdata function give 1 as the lower number for a rank (and not 0)
        rk_data = st.rankdata(-preds, method='ordinal')
        
        computed_loss = loss.compute_loss(perm, preds, rk_data)
        # keep info about the best solution found depending on an objective function
        
        # print("target_item : ", target_item)
        # print("rank : ", rk_data[target_item])
        # print("lost : ", computed_loss)

        search_info.update_values(
            preds, rk_data, perm, computed_loss, counter, curr_budget, top_k)
        
        if hasattr(strategy, 'set_score'):
            reverse_search = strategy.set_score(
                len(items_interacted) - len(perm) - 1,
                preds[target_item],
                preds[(rk_data == top_k).nonzero()][0]
            )
            if reverse_search:
                _total_loss[j].solution_found = False
        
        strategy.reset_costs()
        counter += 1
        budget = curr_budget
    
    _total_loss.append(search_info)

    return _total_loss

In [3]:
def convert_res_to_lists(cfs, cnt, non_achieved_target, technique, missing_target_in_topk):
    """
    Convert counterfactual results to lists for analysis.

    Parameters:
    - cfs (dict): Counterfactual results for a specific strategy.
    - cnt (dict): Dictionary for counting statistics.
    - non_achieved_target (list): [Not used?] List for tracking cases where the target is not achieved.
    - technique (str): Name of the counterfactual strategy.
    - missing_target_in_topk: Boolean indicating whether to find CFs for missing (True) or present (False) target items in top k.

    Returns:
    - tuple: Updated count dictionary and list of non-achieved target cases.
    """
    
    # the key is the position and values are lists of info on each user
    for key, values in cfs.items():
        total_data = []
        # Ensures that if the key does not exist in cnt, it is created with an empty list as default value
        cnt[key].setdefault(technique, [])
        cfs_no = 0

        for rec in values:
            if rec is None: continue
            if missing_target_in_topk:
                total_data.append([
                    len(rec.interactions['initial']) - len(rec.interactions['original']), 
                    rec.cfs_dist,
                    # for boxplot
                    rec.budget_spent['initial'], rec.budget_spent['best'],
                    rec.iter_no['initial'], rec.iter_no['best'],
                    rec.user_id, len(rec.interactions['original'])
                ] + rec.stats_per_cardinality)
            else:
                total_data.append([
                    len(rec.interactions['original']) - len(rec.interactions['initial']), 
                    rec.cfs_dist,
                    # for boxplot
                    rec.budget_spent['initial'], rec.budget_spent['best'],
                    rec.iter_no['initial'], rec.iter_no['best'],
                    rec.user_id, len(rec.interactions['original'])
                ] + rec.stats_per_cardinality)

            cfs_no = len(rec.interactions['original'])
        cnt[key][technique].append([item[0] for item in total_data])
        cnt[key][technique].append([item[1] for item in total_data])
        cnt[key][technique].append([item[2] for item in total_data])
        cnt[key][technique].append([item[3] for item in total_data])
        cnt[key][technique].append([item[4] for item in total_data])
        cnt[key][technique].append([item[5] for item in total_data])
        cnt[key][technique].append([item[6] for item in total_data])
        cnt[key][technique].append([item[7] for item in total_data])
        cnt[key][technique].append([item[1] for item in total_data])

        for i in range(cfs_no):
            cnt[key][technique].append([item[8 + i] for item in total_data])

    return cnt, non_achieved_target

In [11]:
def gpu_embeddings_to_cosine_similarity_matrix(E):
    """
    Converts a tensor of n embeddings to an (n, n) tensor of cosine similarities.

    Parameters:
    - E (torch.Tensor): Tensor of n embeddings.

    Returns:
    - torch.Tensor: Cosine similarity matrix.
    """

    dot = E @ E.t()
    norm = torch.norm(E, 2, 1)
    x = torch.div(dot, norm)
    x = torch.div(x, torch.unsqueeze(norm, -1))
    return x

In [12]:
from torch.nn.functional import cosine_similarity


def embeddings_to_cosine_similarity_matrix(E):
    """ 
    Converts a a tensor of n embeddings to an (n, n) tensor of similarities.
    """
    similarities = [[cosine_similarity(a, b, dim=0) for a in E] for b in E]
    similarities = list(map(lambda x: torch.stack(x, dim=-1), similarities))
    return torch.stack(similarities)

In [1]:
from scipy.spatial.distance import pdist, squareform


def compute_sim_matrix(dataset, metric='jaccard', adjusted=False):
    """
    Computes the item-item similarity matrix utilizing implicit feedback i.e., whether interacted or not with an item

    Parameters:
    - dataset: Dataset object containing user-item interactions.
    - metric (str): Similarity metric to use, default is 'jaccard'.
    - adjusted (bool): Whether to use adjusted similarity scores, default is False.

    Returns:
    - np.ndarray: Item-item similarity matrix.
    """
    
    # Create a matrix M representing user-item interactions
    M = np.zeros((dataset.num_users - 1, dataset.num_items - 1), dtype=bool)
    for u in trange(1, dataset.num_users):
        np.add.at(
            M[u-1], (dataset.item_ids[dataset.user_ids == u]) - 1,
            dataset.ratings[dataset.user_ids == u]
        )

    # Adjust the matrix if specified (it centers data around zero)
    if adjusted:
        M_u = M.mean(axis=1)
        M = M - M_u[:, np.newaxis]

    # Compute the similarity matrix based on the chosen metric, by subtracting these values 
    # from 1, we obtain a similarity measure where higher values indicate greater similarity.
    # We take the transposed because pdist compute distance between cols and we want between rows.
    similarity_matrix = 1 - squareform(pdist(M.T, metric))

    return similarity_matrix

In [14]:
from collections import Counter


def rank_interactions_to_excluded_item_per_user(cfs, sims_matrix):
    non_solvable_cases = []
    total_data = []

    for items in cfs:
        for rec in items:
            if rec is None: continue

            if not rec.solution_found:
                non_solvable_cases.append(rec.user_id)
                continue

            items_rank = st.rankdata(sims_matrix[rec.item_id, rec.complete_interactions])
            similarity_rank = len(rec.complete_interactions) - items_rank + 1
            del_items_indices = np.where(np.isin(
                rec.complete_interactions, 
                list(set(rec.complete_interactions).difference(set(rec.interactions)))
            ))
            total_data.extend(sorted(similarity_rank[del_items_indices].astype(int)[-1:]))

    return (Counter(total_data), non_solvable_cases)

In [None]:
def create_evaluation_dataframe(dataset, model, strategy, budget, target_items_pos, top_k, sim_matrix, csv_file_name = None, alpha = None, normalization = None):
    """
        Creates a DataFrame that contains the user_id, the target posisition, the original interactions of 
        this user and the counterfactual explanation found by the strategy given as parameter.

        Args:
            dataset : The dataset containing users' interaction sequences.
            model : The recommender system.
            strategy : The strategy used to find the cfs.
            budget (int) : The budget allocated for the strategy.
            target_items_pos (list) : The list of position of the items in the list of recommendation for which we need cfs.
            csv_file_name (string) : If a name is specified, it creates a csv file of the df with this name.
        
        Returns:
            Dataframe : A df with the result.
        """
    evaluation_df = pd.DataFrame(columns=['user_id', 'target_pos', 'target_item', 'original_interactions', 'best_interactions', 'cfs', 'len_cfs'])
    # list_test = [1001, 1169, 1340]
    for user_id in tqdm(range(1, max(dataset.user_ids))):
        for target_pos in target_items_pos:
            if alpha is None or normalization is None :
                specific_cfs = _find_specific_cfs_(dataset, model, strategy, target_pos, False, sim_matrix, user_id, budget, top_k)
            else :
                specific_cfs = _find_specific_cfs_(dataset, model, strategy, target_pos, False, sim_matrix, user_id, budget, top_k, alpha = alpha, normalization = normalization)
            
            user_sequences = dataset.sequences[dataset.user_ids == user_id]
            
            for j in range (min(1, len(user_sequences))):
                if all(v > 0 for v in user_sequences[j]):
                    original_interactions = user_sequences[j].copy()
                    best_interactions = specific_cfs[j].interactions['best']
                    item_id = specific_cfs[j].item_id
                    items_removed = np.setdiff1d(original_interactions, best_interactions)
                    # new_lign = {'user_id': user_id, 'target_pos': target_pos, 'target_item': item_id, 'original_interactions': original_interactions.tolist(), 'best_interactions': best_interactions.tolist(),'cfs': items_removed.tolist(), 'len_cfs': len(items_removed)}
                    new_lign = {'user_id': user_id, 'target_pos': target_pos, 'target_item': item_id, 'original_interactions': original_interactions, 'best_interactions': best_interactions,'cfs': items_removed.tolist(), 'len_cfs': len(items_removed)}
                    evaluation_df = evaluation_df.append(new_lign, ignore_index=True)
    if csv_file_name is not None : 
        path = os.getcwd()
        csv_path = os.path.join(path, "csv")
        if not os.path.exists(csv_path):
            os.makedirs(csv_path)
        
        evaluation_df.to_csv(os.path.join(csv_path, csv_file_name), index=False)
                    
    return evaluation_df

In [3]:
import ast

def replace_items_if_missing(items_removed, target_list):
    items_to_replace = set(items_removed) - set(target_list)
    for i, item in enumerate(items_to_replace):
        j = i
        while target_list[j] in items_removed:
            j += 1
        target_list[j] = item
    # should maybe shuffle?

def are_the_same(list1, list2):
    sorted_list1 = sorted(list1)
    sorted_list2 = sorted(list2)

    return sorted_list1 == sorted_list2
    
def evaluate_reverse_mode(csv_file_name, output_name, model, dataset, strategy, budget, len_sample, sim_matrix, top_k, alpha = None, normalization = None):
    
    good_cases_df = pd.DataFrame(columns=['user_id', 'target_pos', 'mid_pos', 'last_pos', 'original_interactions', 'items_removed', 'exclusion_interactions', 'inclusion_interactions', 'worst_jac'])
    good_but_not_same_cases_df = pd.DataFrame(columns=['user_id', 'target_pos', 'mid_pos', 'last_pos', 'original_interactions', 'items_removed', 'exclusion_interactions', 'inclusion_interactions', 'worst_jac'])
    wrong_cases_df = pd.DataFrame(columns=['user_id', 'target_pos', 'mid_pos', 'last_pos', 'original_interactions', 'items_removed', 'exclusion_interactions', 'inclusion_interactions', 'worst_jac'])
    
    path = os.getcwd()
    csv_path = os.path.join(path, "csv")
    file = os.path.join(csv_path, csv_file_name)
    
    if not os.path.exists(file) :
        print("Need the csv file : " + csv_file_name + ". Please run the create_evaluation_dataframe function with this specific strategy to generate the csv file.")
    
    else :
        df = pd.read_csv(file)

        counter = 0
        with tqdm(total=df.shape[0], desc='csv reading...') as pbar:
            for index, row in df.iterrows():
                user_id = row["user_id"]
                target_pos = row["target_pos"]
                target_item = row["target_item"]
                original_interactions = ast.literal_eval(row["original_interactions"])
                exclusion_interactions = ast.literal_eval(row["best_interactions"])
                items_removed = ast.literal_eval(row["cfs"])

                # print("user_id :", user_id)
                # print("target_pos :", target_pos)

                if len(exclusion_interactions) == 0 : 
                    print("empty", user_id)
                    continue
                
                # preds = m.predict(perm)
                # preds[perm] = -StaticVars.FLOAT_MAX
                # #/!\ the rankdata function give 1 as the lower number for a rank (and not 0)
                # rk_data = st.rankdata(-preds, method='ordinal')
                
            

                predictions_reverse = -model.predict(exclusion_interactions)
                predictions_reverse[exclusion_interactions] = StaticVars.FLOAT_MAX
                target_pos_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0] 

                worst_jaccard_sample = find_sample_with_jaccard(target_item, exclusion_interactions, sim_matrix, len_sample, worst_items = True)

                if len(items_removed) >= len_sample:
                    print(f"Sequence skipped, too much items removed for user_id {user_id}.")
                    continue
                
                if not set(items_removed) <= set(worst_jaccard_sample):
                    replace_items_if_missing(items_removed, worst_jaccard_sample)
                
                if alpha is None or normalization is None : 
                    worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, budget, top_k, True, sim_matrix, exclusion_interactions, target_item, worst_jaccard_sample)
                else:
                    worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, budget, top_k, True, sim_matrix, exclusion_interactions, target_item, worst_jaccard_sample, alpha=0.5, normalization='default')

                inclusion_interactions = worst_jacc_search_info[0].interactions['best']
                inclusion_predictions = -model.predict(inclusion_interactions)
                inclusion_predictions[inclusion_interactions] = StaticVars.FLOAT_MAX
                last_target_pos = np.where(inclusion_predictions.argsort() == target_item)[0][0]

                if last_target_pos <= target_pos :
                    if inclusion_interactions == original_interactions :
                        counter = counter + 1
                        new_lign = {'user_id': user_id, 'target_pos': target_pos, 'mid_pos' : target_pos_reverse, 'last_pos' : last_target_pos, 'original_interactions' : original_interactions, 'items_removed' : items_removed, 'exclusion_interactions' : exclusion_interactions, 'inclusion_interactions' : inclusion_interactions,'worst_jac' : worst_jaccard_sample, 'same_interactions' : True}
                        good_cases_df = good_cases_df.append(new_lign, ignore_index=True) 
                    elif len(inclusion_interactions) <= len(original_interactions):
                        counter = counter + 1
                        new_lign = {'user_id': user_id, 'target_pos': target_pos, 'mid_pos' : target_pos_reverse, 'last_pos' : last_target_pos, 'original_interactions' : original_interactions, 'items_removed' : items_removed, 'exclusion_interactions' : exclusion_interactions, 'inclusion_interactions' : inclusion_interactions,'worst_jac' : worst_jaccard_sample, 'same_interactions' : False}
                        good_but_not_same_cases_df = good_but_not_same_cases_df.append(new_lign, ignore_index=True) 
                    else : 
                        new_lign = {'user_id': user_id, 'target_pos': target_pos, 'mid_pos' : target_pos_reverse, 'last_pos' : last_target_pos, 'original_interactions' : original_interactions, 'items_removed' : items_removed, 'exclusion_interactions' : exclusion_interactions, 'inclusion_interactions' : inclusion_interactions, 'worst_jac' : worst_jaccard_sample, 'same_interactions' : False}
                        wrong_cases_df = wrong_cases_df.append(new_lign, ignore_index=True) 
                else : 
                    new_lign = {'user_id': user_id, 'target_pos': target_pos, 'mid_pos' : target_pos_reverse, 'last_pos' : last_target_pos, 'original_interactions' : original_interactions, 'items_removed' : items_removed, 'exclusion_interactions' : exclusion_interactions, 'inclusion_interactions' : inclusion_interactions, 'worst_jac' : worst_jaccard_sample, 'same_interactions' : False}
                    wrong_cases_df = wrong_cases_df.append(new_lign, ignore_index=True) 
                
                pbar.update(1)

        good_cases_df.to_csv("./csv/evaluation_reverse_mode/" + output_name + "_good_cases.csv", index=False)  
        good_but_not_same_cases_df.to_csv("./csv/evaluation_reverse_mode/" + output_name + "_good_but_not_same_cases.csv", index=False)  
        wrong_cases_df.to_csv("./csv/evaluation_reverse_mode/" + output_name + "_wrong_cases.csv", index=False)  
            

    return counter, df.shape[0]

In [None]:
import os
def evaluate_sampling_method(csv_file_name, model, dataset, strategy, len_sample, sim_matrix, top_k, sampling_method = "jaccard", alpha = None, normalization = None):
    result_df = pd.DataFrame(columns=['user_id', 'target_pos','inclusion_pos', 'original_interactions', 'exclusion_interactions', 'inclusion_interactions', 'length_exclusion_cfs', 'length_inclusion_cfs', 'inclusion_removed_items'])
    
    path = os.getcwd()
    csv_path = os.path.join(path, "csv")
    file = os.path.join(csv_path, csv_file_name)
    
    if not os.path.exists(file) :
        print("Need the csv file : " + csv_file_name + ". Please run the create_evaluation_dataframe function with this specific strategy to generate the csv file.")
    
    else :
        df = pd.read_csv(file)

        counter = 0
        with tqdm(total=df.shape[0], desc='csv reading...') as pbar:
            for index, row in df.iterrows():
                user_id = row["user_id"]
                target_pos = row["target_pos"]
                target_item = row["target_item"]
                original_interactions = ast.literal_eval(row["original_interactions"])
                exclusion_interactions = ast.literal_eval(row["best_interactions"])
                items_removed = ast.literal_eval(row["cfs"])
                length_cfs = row["len_cfs"]

                if sampling_method == "jaccard":
                    inclusion_sample = find_sample_with_jaccard(target_item, exclusion_interactions, sim_matrix, len_sample)
                elif sampling_method == "rs":
                    inclusion_sample = find_sample_with_recommender(target_item, exclusion_interactions, model, k=20)


                if alpha is None or normalization is None : 
                    inclusion_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, 1048576, top_k, True, sim_matrix, exclusion_interactions, target_item, inclusion_sample)
                else:
                    inclusion_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, 1048576, top_k, True, sim_matrix, exclusion_interactions, target_item, inclusion_sample, alpha=0.5, normalization='default')


                inclusion_interactions = inclusion_search_info[0].interactions['best']
                inclusion_predictions = -model.predict(inclusion_interactions)
                inclusion_predictions[inclusion_interactions] = StaticVars.FLOAT_MAX
                last_target_pos = np.where(inclusion_predictions.argsort() == target_item)[0][0]
                inclusion_removed_items = np.setdiff1d(inclusion_interactions, exclusion_interactions)
                length_inclusion_cfs = len(np.setdiff1d(inclusion_interactions, exclusion_interactions))

                new_lign = {'user_id': user_id, 'target_pos': target_pos, 'inclusion_pos' : last_target_pos, 'original_interactions' : original_interactions, 'exclusion_interactions' : exclusion_interactions, 'inclusion_interactions' : inclusion_interactions,'length_exclusion_cfs' : length_cfs, 'length_inclusion_cfs' : length_inclusion_cfs, 'inclusion_removed_items' : inclusion_removed_items }
                result_df = result_df.append(new_lign, ignore_index=True) 

        result_df.to_csv("./csv./evaluate_sampling_" + sampling_method + ".csv", index=False) 

In [1]:
# import ast
# def test_evaluate_reverse_mode(csv_file_name, model, dataset, strategy, len_sample, sim_matrix, top_k, alpha = None, normalization = None):
#     path = os.getcwd()
#     csv_path = os.path.join(path, "csv")
#     file = os.path.join(csv_path, csv_file_name)
    
#     if not os.path.exists(file) :
#         print("Need the csv file : " + csv_file_name + ". Please run the create_evaluation_dataframe function with this specific strategy to generate the csv file.")
    
#     else :
#         df = pd.read_csv(file)
#         row = df.iloc[0]
#         user_id = row["user_id"]
#         target_pos = row["target_pos"]
    
#         user_sequences = dataset.sequences[dataset.user_ids == user_id]
#         user_sequences = [sequence for sequence in user_sequences if all(value > 0 for value in sequence)]
#         original_interactions = user_sequences[0]
#         original_interactions = ast.literal_eval(row["original_interactions"])
#         print("original :", original_interactions)
    
#         brute_force_specific_cfs = _find_specific_cfs_(dataset, model, strategy, target_pos, False, sim_matrix, user_id, 1048576, top_k)
#         best_interactions = brute_force_specific_cfs[0].interactions['best']
#         best_interactions = ast.literal_eval(row["best_interactions"])
#         print("best:", best_interactions)
    
#         items_removed = np.setdiff1d(original_interactions, best_interactions)
#         items_removed = ast.literal_eval(row["cfs"])
#         print("items_removed", items_removed)
        
#         predictions = -model.predict(original_interactions)
#         predictions[original_interactions] = StaticVars.FLOAT_MAX
#         target_item = predictions.argsort()[min(top_k, target_pos)] # TODO retrieve it from the csv
#         print("target", target_item)
    
#         if len(best_interactions) == 0 : 
#             print("empty", user_id)
            
#         predictions_reverse = -model.predict(best_interactions)
#         predictions_reverse[best_interactions] = StaticVars.FLOAT_MAX
#         pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0] #bug maybe?
#         print("new pos",  pos_target_item_reverse)
    
#         worst_jaccard_sample = find_sample_with_jaccard(target_item, best_interactions, sim_matrix, len_sample, worst_items = True)
#         print("worst_jac", worst_jaccard_sample )
        
#         if len(items_removed) >= len_sample:
#             print(f"Sequence skipped, too much items removed for user_id {user_id}.")
        
#         if not set(items_removed) <= set(worst_jaccard_sample):
#             replace_items_if_missing(items_removed, worst_jaccard_sample)
#             print("new worst", worst_jaccard_sample)
    
#             if alpha is None or normalization is None : 
#                 worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, 1048576, top_k, True, sim_matrix, best_interactions, target_item, worst_jaccard_sample)
#             else:
#                 worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, 1048576, top_k, True, sim_matrix, best_interactions, target_item, worst_jaccard_sample, alpha=0.5, normalization='default')
        
#         print("neg_original_interactions", worst_jacc_search_info[0].interactions['best'])
    
#         last_predictions_reverse = -model.predict(worst_jacc_search_info[0].interactions['best'])
#         last_predictions_reverse[worst_jacc_search_info[0].interactions['best']] = StaticVars.FLOAT_MAX
#         last_pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0] #bug maybe?
#         print("last pos",  last_pos_target_item_reverse)
    
#         # worst_jacc_cfs = np.setdiff1d( worst_jacc_search_info[0].interactions['best'], best_interactions)
        
#         if are_the_same(worst_jacc_search_info[0].interactions['best'], original_interactions):
#             # counter = counter + 1
#             print("Hopefully they are the same!")

In [None]:
def test_evaluate_reverse_mode(user_id, csv_file_name, model, dataset, strategy, len_sample, sim_matrix, top_k, alpha = None, normalization = None):
    
    target_pos = 9

    user_sequences = dataset.sequences[dataset.user_ids == user_id]
    user_sequences = [sequence for sequence in user_sequences if all(value > 0 for value in sequence)]
    original_interactions = user_sequences[0]
    print("original :", original_interactions)

    brute_force_specific_cfs = _find_specific_cfs_(dataset, model, strategy, target_pos, False, sim_matrix, user_id, 1048576, top_k)
    best_interactions = brute_force_specific_cfs[0].interactions['best']
    print("best:", best_interactions)

    items_removed = np.setdiff1d(original_interactions, best_interactions)
    print("items_removed", items_removed)
    
    predictions = -model.predict(original_interactions)
    predictions[original_interactions] = StaticVars.FLOAT_MAX
    target_item = predictions.argsort()[min(top_k, target_pos)] # TODO retrieve it from the csv
    print("target", target_item)

    if len(best_interactions) == 0 : 
        print("empty", user_id)
        
    predictions_reverse = -model.predict(best_interactions)
    predictions_reverse[best_interactions] = StaticVars.FLOAT_MAX
    pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0] #bug maybe?
    print("new pos",  pos_target_item_reverse)

    worst_jaccard_sample = find_sample_with_jaccard(target_item, best_interactions, sim_matrix, len_sample, worst_items = True)
    print("worst_jac", worst_jaccard_sample )
    
    if len(items_removed) >= len_sample:
        print(f"Sequence skipped, too much items removed for user_id {user_id}.")
    
    if not set(items_removed) <= set(worst_jaccard_sample):
        replace_items_if_missing(items_removed, worst_jaccard_sample)
        print("new worst", worst_jaccard_sample)
        #bug ??
        if alpha is None or normalization is None : 
            worst_jacc_search_info = _find_specific_cfs_(dataset, model, strategy, pos_target_item_reverse, True, sim_matrix, user_id, 1048576, top_k)
            worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, 1048576, top_k, True, sim_matrix, best_interactions, target_item, worst_jaccard_sample)
        else:
            worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, 1048576, top_k, True, sim_matrix, best_interactions, target_item, worst_jaccard_sample, alpha=0.5, normalization='default')
    
    print("neg_original_interactions", worst_jacc_search_info[0].interactions['best'])

    last_predictions_reverse = -model.predict(worst_jacc_search_info[0].interactions['best'])
    last_predictions_reverse[worst_jacc_search_info[0].interactions['best']] = StaticVars.FLOAT_MAX
    
    last_pos_target_item_reverse = np.where(last_predictions_reverse.argsort() == target_item)[0][0] #bug maybe?
    print("last pos",  last_pos_target_item_reverse)
    print("item : ", last_predictions_reverse.argsort()[last_pos_target_item_reverse])

    # worst_jacc_cfs = np.setdiff1d( worst_jacc_search_info[0].interactions['best'], best_interactions)
    
    if are_the_same(worst_jacc_search_info[0].interactions['best'], original_interactions):
        # counter = counter + 1
        print("Hopefully they are the same!")

In [None]:
def replace_items_if_missing(items_removed, target_list):
    items_to_replace = set(items_removed) - set(target_list)
    for i, item in enumerate(items_to_replace):
        j = i
        while target_list[j] in items_removed:
            j += 1
        target_list[j] = item
    # should maybe shuffle?

def create_reverse_mode_evaluation_dataframe(dataset, model, strategy, target_item_pos, top_k, sim_matrix):
    evaluation_df = pd.DataFrame(columns=['user_id', 'target_pos', 'original_interactions', 'cfs', 'len_cfs'])#, 'worst_jacc_sample', 'worst_jacc_cfs', 'len_worst_jacc_cfs'])#, 'jacc_cfs', 'len_jacc_cfs', 'rs_cfs', 'len_rs_cfs'])
    len_sample = 20

    for user_id in tqdm.notebook.tqdm(range(1, max(dataset.user_ids))):
        for target_pos in target_item_pos:
            specific_cfs = _find_specific_cfs_(dataset, model, strategy, target_pos, False, sim_matrix, user_id, 1000, top_k, alpha=0.5, normalization='default')
            
            user_sequences = test.sequences[test.user_ids == user_id]
            
            for j in range (min(1, len(user_sequences))):
                if all(v > 0 for v in user_sequences[j]):
                    original_interactions = user_sequences[j].copy()
                    best_interactions = specific_cfs[j].interactions['best']
                    items_removed = np.setdiff1d(original_interactions, best_interactions)
                    new_lign = {'user_id': user_id, 'target_pos': target_pos, 'cfs': items_removed, 'len_cfs': len(items_removed)}#, 'worst_jacc_cfs': None, 'len_worst_jacc_cfs': None}#, jacc_search_info[0].interactions['best'], len(jacc_search_info[0].interactions['best']), rs_search_info[0].interactions['best'], len(rs_search_info[0].interactions['best'])])
                    evaluation_df = evaluation_df.append(new_lign, ignore_index=True)
                    predictions = -pretrained_models['lstm'].predict(original_interactions)
                    predictions[original_interactions] = StaticVars.FLOAT_MAX
                    target_item = predictions.argsort()[min(top_k, target_pos)]
                    if len(best_interactions) == 0 : 
                        print("empty", user_id)
                        new_lign = {'user_id': user_id, 'target_pos': target_pos, 'cfs': items_removed, 'len_cfs': len(items_removed), 'worst_jacc_cfs': None, 'len_worst_jacc_cfs': None}#, jacc_search_info[0].interactions['best'], len(jacc_search_info[0].interactions['best']), rs_search_info[0].interactions['best'], len(rs_search_info[0].interactions['best'])])
                        evaluation_df = evaluation_df.append(new_lign, ignore_index=True)
                        break
                    predictions_reverse = -pretrained_models['lstm'].predict(best_interactions)
                    predictions_reverse[best_interactions] = StaticVars.FLOAT_MAX
                    pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0] #bug maybe?
                    worst_jaccard_sample = find_sample_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, len_sample, worst_items = True)
                    jaccard_sample = find_sample_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, len_sample, worst_items = True)
                    rs_sample = find_best_items_using_recommender(target_item, best_interactions, pretrained_models['lstm'], len_sample)

                    if len(items_removed) >= len_sample:
                        print(f"Sequence skipped, too much items removed for user_id {user_id}.")
                        continue
                    
                    if not set(items_removed) <= set(jaccard_sample):
                        replace_items_if_missing(items_removed, jaccard_sample)
                        jacc_search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, jaccard_sample, alpha=0.5)

                    if not set(items_removed) <= set(rs_sample):
                        replace_items_if_missing(items_removed, rs_sample)
                        rs_search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, rs_sample, alpha=0.5)

                    if not set(items_removed) <= set(worst_jaccard_sample):
                        replace_items_if_missing(items_removed, worst_jaccard_sample)
                        worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, pos_target_item_reverse, worst_jaccard_sample, alpha=0.5, normalization='default')
                    worst_jacc_cfs = np.setdiff1d( worst_jacc_search_info[j].interactions['best'], best_interactions)
                    new_lign = {'user_id': user_id, 'original_interactions': original_interactions, 'target_pos': target_pos, 'cfs': items_removed, 'len_cfs': len(items_removed), 'worst_jacc_sample': worst_jaccard_sample, 'worst_jacc_cfs': worst_jacc_search_info[j].interactions['best'], 'len_worst_jacc_cfs': len(worst_jacc_search_info[j].interactions['best'])}#, jacc_search_info[0].interactions['best'], len(jacc_search_info[0].interactions['best']), rs_search_info[0].interactions['best'], len(rs_search_info[0].interactions['best'])])
                    evaluation_df = evaluation_df.append(new_lign, ignore_index=True)
    return evaluation_df
# df3 = create_reverse_mode_evaluation_dataframe(test, pretrained_models['lstm'], get_backend_strategy('combo'), [1, 3, 5, 7], 10, jaccard_sims_matrix)
# %store df3

In [18]:
# A simple class stack that only allows pop and push operations
class Stack:

    def __init__(self):
        self.stack = []

    def pop(self):
        if len(self.stack) < 1:
            return None
        return self.stack.pop()

    def push(self, item):
        self.stack.append(item)

    def size(self):
        return len(self.stack)


# And a queue that only has enqueue and dequeue operations
class Queue:

    def __init__(self):
        self.queue = []

    def enqueue(self, item):
        self.queue.append(item)

    def dequeue(self):
        if len(self.queue) < 1:
            return None
        return self.queue.pop(0)

    def size(self):
        return len(self.queue)

    def clear(self):
        del self.queue[:]

    def get(self, i):
        return self.queue[i]
    
    def setter(self, i, v):
        self.queue[i] = v