In [2]:
# Import necessary libraries
import os, sys
import numpy as np
import pandas as pd
import itertools
from collections import defaultdict
import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

# Inits

In [3]:
# Get the current working directory
base_dir = os.getcwd()
plots_folder = 'plots'

# Load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb')
%run $helpers_file

current_module = sys.modules[__name__]

class Strategies:
    """
    A class representing different strategies for counterfactual explanation generation.

    Attributes:
    - stypes (list): List of strategy types, including 'random', 'brute_force', 'bfs_yloss', 'bidirectional', and 'combo'.
    """
    stypes = [
        'random',
        'brute_force',
        'bfs_yloss',
        'bidirectional',
        'combo',
    ]

# Movielens 10M

## Load data

In [4]:
## uncomment the following lines if you want to run the various strategies in budget_strategies
implemented_strategies = os.path.join(base_dir, 'clean_budget_strategies.ipynb')
%run $implemented_strategies

# Restore relevant variables from other notebooks

%store -r random_cfs
%store -r brute_force_cfs
%store -r bfs_yloss_cfs
# %store -r dfs_yloss_cfs
# %store -r bestfs_yloss_cfs
%store -r bidirectional_cfs
%store -r combo_cfs

  0%|          | 0/6040 [00:00<?, ?it/s]

## Save/retrieve data experiment if necessary

In [5]:
import pickle

base_dir = os.getcwd()
pkl_dir = os.path.join(base_dir, 'pkl/')


In [6]:
# Uncomment these lines to save the object from the budget strategies notebook in the pkl folder
# with open(os.path.join(pkl_dir,'random_cfs.pkl'), 'wb') as f:
#     pickle.dump(random_cfs, f)
    
# with open(os.path.join(pkl_dir,'brute_force_cfs.pkl'), 'wb') as f:
#     pickle.dump(brute_force_cfs, f)

# with open(os.path.join(pkl_dir,'bfs_yloss_cfs.pkl'), 'wb') as f:
#     pickle.dump(bfs_yloss_cfs, f)

# with open(os.path.join(pkl_dir,'bidirectional_cfs.pkl'), 'wb') as f:
#     pickle.dump(bidirectional_cfs, f)

# with open(os.path.join(pkl_dir,'combo_cfs.pkl'), 'wb') as f:
#     pickle.dump(combo_cfs, f)

In [7]:
# Uncomment these lines to load the object stored as pkl in the pkl folder
with open(os.path.join(pkl_dir,'random_cfs.pkl'), 'rb') as f:
    random_cfs = pickle.load(f)
    
with open(os.path.join(pkl_dir,'brute_force_cfs.pkl'), 'rb') as f:
    brute_force_cfs = pickle.load(f)
    
with open(os.path.join(pkl_dir,'bfs_yloss_cfs.pkl'), 'rb') as f:
    bfs_yloss_cfs = pickle.load(f)
    
with open(os.path.join(pkl_dir,'bidirectional_cfs.pkl'), 'rb') as f:
    bidirectional_cfs = pickle.load(f)
    
with open(os.path.join(pkl_dir,'combo_cfs.pkl'), 'rb') as f:
    combo_cfs = pickle.load(f)

# Evaluation of the strategies

In [15]:
# Load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb')
%run $helpers_file

counter, shape = evaluate_reverse_mode("new_brute_force_evaluation_cfs.csv", pretrained_models['lstm'], test,  get_backend_strategy("brute_force"), 20, jaccard_sims_matrix, 10)

print(counter)
print(shape)

user_id :  5
target_pos :  1
original : [230, 257, 359, 130, 358, 329, 372, 227, 324, 301, 253, 239, 107, 305, 266, 315, 60, 331, 123, 357]
best: [230, 359, 130, 358, 329, 372, 227, 324, 301, 253, 239, 107, 305, 60]
target 461
new pos 11
worst_jac [285, 505, 594, 19, 1578, 507, 492, 508, 480, 440, 293, 510, 485, 439, 325, 447, 266, 543, 521, 461]
new worst [257, 357, 331, 315, 123, 507, 492, 508, 480, 440, 293, 510, 485, 439, 325, 447, 266, 543, 521, 461]



[230, 359, 130, 358, 329, 372, 227, 324, 301, 253, 239, 107, 305, 60, 357]
[230, 359, 130, 358, 329, 372, 227, 324, 301, 253, 239, 107, 305, 60]
last pos 11
user_id :  5
target_pos :  3
original : [230, 257, 359, 130, 358, 329, 372, 227, 324, 301, 253, 239, 107, 305, 266, 315, 60, 331, 123, 357]
best: [230, 257, 359, 130, 358, 329, 227, 324, 253, 239, 107, 305, 60, 331, 123, 357]
target 594
new pos 10
worst_jac [165, 157, 508, 281, 466, 438, 766, 40, 53, 90, 521, 492, 511, 19, 705, 440, 313, 485, 439, 594]
new worst [266, 315, 372,

In [36]:
# Load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb')
%run $helpers_file

test_evaluate_reverse_mode(8, "new_brute_force_evaluation_cfs.csv", pretrained_models['lstm'], test,  get_backend_strategy("brute_force"), 20, jaccard_sims_matrix, 10)

original : [384 461 236 478  86 457 511  60 323 443 360 473 472 514 456 481 119 450
 453 444]
best: [384 461 236 478  86 457 511  60 323 443 473 472 514 456 481 119 450 453
 444]
items_removed [360]
target 704
new pos 10
19
sim_matrix 3706
sim_matrix 3706
19
test :  3687
top_items_indices :  [ 727  710  806   87  649  515  671  537  712  680 1072  619  638 1412
  881  270  774  791  547  684]
[ 746  729  825   89  668  534  690  556  731  699 1091  638  657 1431
  900  274  793  810  566  703  358]
tmp [ 2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18. 19.
 20. 21.  1.]
worst_jac [747, 730, 826, 90, 669, 535, 691, 557, 732, 700, 1092, 639, 658, 1432, 901, 275, 794, 811, 567, 704]
new worst [360, 730, 826, 90, 669, 535, 691, 557, 732, 700, 1092, 639, 658, 1432, 901, 275, 794, 811, 567, 704]
neg_original_interactions [384 461 236 478  86 457 511  60 323 443 473 472 514 456 481 119 450 453
 444]
last pos 10
item :  704


In [None]:
def test_evaluate_reverse_mode(user_id, csv_file_name, model, dataset, strategy, len_sample, sim_matrix, top_k, alpha = None, normalization = None):
    
    target_pos = 9

    user_sequences = dataset.sequences[dataset.user_ids == user_id]
    user_sequences = [sequence for sequence in user_sequences if all(value > 0 for value in sequence)]
    original_interactions = user_sequences[0]
    print("original :", original_interactions)

    brute_force_specific_cfs = _find_specific_cfs_(dataset, model, strategy, target_pos, False, sim_matrix, user_id, 1048576, top_k)
    best_interactions = brute_force_specific_cfs[0].interactions['best']
    print("best:", best_interactions)

    items_removed = np.setdiff1d(original_interactions, best_interactions)
    print("items_removed", items_removed)
    
    predictions = -model.predict(original_interactions)
    predictions[original_interactions] = StaticVars.FLOAT_MAX
    target_item = predictions.argsort()[min(top_k, target_pos)] # TODO retrieve it from the csv
    print("target", target_item)

    if len(best_interactions) == 0 : 
        print("empty", user_id)
        
    predictions_reverse = -model.predict(best_interactions)
    predictions_reverse[best_interactions] = StaticVars.FLOAT_MAX
    pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0] #bug maybe?
    print("new pos",  pos_target_item_reverse)

    worst_jaccard_sample = find_worst_items_with_jaccard(target_item, best_interactions, sim_matrix, len_sample)
    print("worst_jac", worst_jaccard_sample )
    
    if len(items_removed) >= len_sample:
        print(f"Sequence skipped, too much items removed for user_id {user_id}.")
    
    if not set(items_removed) <= set(worst_jaccard_sample):
        replace_items_if_missing(items_removed, worst_jaccard_sample)
        print("new worst", worst_jaccard_sample)
        #bug ??
        if alpha is None or normalization is None : 
            worst_jacc_search_info = _find_specific_cfs_(dataset, model, strategy, pos_target_item_reverse, True, sim_matrix, user_id, 1048576, top_k)
            worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, 1048576, top_k, True, sim_matrix, best_interactions, target_item, worst_jaccard_sample)
        else:
            worst_jacc_search_info = retrieve_solutions_specific_sequence(user_id, dataset, model, strategy, 1048576, top_k, True, sim_matrix, best_interactions, target_item, worst_jaccard_sample, alpha=0.5, normalization='default')
    
    print("neg_original_interactions", worst_jacc_search_info[0].interactions['best'])

    last_predictions_reverse = -model.predict(worst_jacc_search_info[0].interactions['best'])
    last_predictions_reverse[worst_jacc_search_info[0].interactions['best']] = StaticVars.FLOAT_MAX
    
    last_pos_target_item_reverse = np.where(last_predictions_reverse.argsort() == target_item)[0][0] #bug maybe?
    print("last pos",  last_pos_target_item_reverse)
    print("item : ", last_predictions_reverse.argsort()[last_pos_target_item_reverse])

    # worst_jacc_cfs = np.setdiff1d( worst_jacc_search_info[0].interactions['best'], best_interactions)
    
    if are_the_same(worst_jacc_search_info[0].interactions['best'], original_interactions):
        # counter = counter + 1
        print("Hopefully they are the same!")

In [104]:
import numpy as np
from collections import defaultdict
# get currently working directory
base_dir = os.getcwd()

# load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb').replace("\\", "/")
%run $helpers_file


FLOAT_MAX = np.finfo(np.float32).max


target_pos = 7
user_id = 8
top_k = 10
brute_force_specific_cfs = None
i = 0

# while brute_force_specific_cfs == [] or brute_force_specific_cfs is None:
#     i += 1
#     brute_force_specific_cfs = _find_specific_cfs_(test, pretrained_models['lstm'], get_backend_strategy('brute_force'), 7, False, jaccard_sims_matrix, i, 100000, 10)

brute_force_specific_cfs = _find_specific_cfs_(test, pretrained_models['lstm'], get_backend_strategy('brute_force'), target_pos, False, jaccard_sims_matrix, user_id, 100000, top_k)

user_sequences = test.sequences[test.user_ids == user_id]
user_sequences = [sequence for sequence in user_sequences if all(value > 0 for value in sequence)]

original_interactions = user_sequences[0]
print("original_interactions", original_interactions)

best_interactions = brute_force_specific_cfs[0].interactions['best']
print("best_interactions", best_interactions)

items_removed = np.setdiff1d(original_interactions, best_interactions)
print("items_removed", items_removed)

predictions = -pretrained_models['lstm'].predict(original_interactions)
predictions[original_interactions] = FLOAT_MAX
target_item = predictions.argsort()[min(top_k, target_pos)]
print("target_item", target_item)

predictions_reverse = -pretrained_models['lstm'].predict(best_interactions)
predictions_reverse[best_interactions] = FLOAT_MAX
pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0]
print("position target item in reverse mode", pos_target_item_reverse)

worst_jaccard_sample = find_worst_items_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, 20)
print("worst_jacc", worst_jaccard_sample)

jaccard_sample = find_best_items_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, 20)
print("jacc", jaccard_sample)
rs_sample = find_best_items_using_recommender(target_item, best_interactions, pretrained_models['lstm'], 20)
if set(items_removed) <= set(jaccard_sample):
    print("jaccard_sample", jaccard_sample)
    search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('brute_force'), 100000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, jaccard_sample)

elif set(items_removed) <= set(rs_sample):
    print("rs_sample", rs_sample)
    search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('brute_force'), 100000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, rs_sample)
else:
    worst_jaccard_sample[:len(items_removed)] = items_removed[:len(items_removed)]
    print("modified_sample", worst_jaccard_sample)
    search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('brute_force'), 100000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, worst_jaccard_sample)

print("search_info", search_info[0].interactions['best'])

result2 = dict.fromkeys([pos_target_item_reverse])
result2[pos_target_item_reverse] = []
result2[pos_target_item_reverse].extend(search_info)

cnt2 = defaultdict(dict)
no_target_achieved_cases2 = defaultdict(list)


cnt2, no_target_achieved_cases2 = convert_res_to_lists(result2, cnt2, no_target_achieved_cases2, "random_0", True)
print(cnt2)
print(no_target_achieved_cases2)

result = dict.fromkeys([target_pos])
result[target_pos] = []
result[target_pos].extend(brute_force_specific_cfs)

cnt = defaultdict(dict)
no_target_achieved_cases = defaultdict(list)

target_item = 0

cnt, no_target_achieved_cases = convert_res_to_lists(result, cnt, no_target_achieved_cases, "random_0", False)
print(cnt)
print(no_target_achieved_cases)

original_interactions [384 461 236 478  86 457 511  60 323 443 360 473 472 514 456 481 119 450
 453 444]
best_interactions [384 461 236 478  86 457 511  60 323 443 360 473 472 514 456 481 119 450
 453]
items_removed [444]
target_item 80
position target item in reverse mode 12
worst_jacc [320, 84, 376, 1203, 1059, 223, 777, 91, 138, 196, 151, 121, 127, 75, 131, 118, 126, 896, 147, 80]
jacc [2840, 2562, 3681, 3468, 1977, 3598, 3706, 139, 3471, 3603, 1688, 3595, 1988, 3602, 3600, 3601, 3597, 2044, 271, 665]
modified_sample [444, 84, 376, 1203, 1059, 223, 777, 91, 138, 196, 151, 121, 127, 75, 131, 118, 126, 896, 147, 80]
search_info [384, 461, 236, 478, 86, 457, 511, 60, 323, 443, 360, 473, 472, 514, 456, 481, 119, 450, 453, 444]
defaultdict(<class 'dict'>, {12: {'random_0': [[1], [1], [1], [1], [2], [2], [8], [19], [1], [0], [1], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]]}})
defaultdict(<class 'list'>, {})
defaultdict(<class 'dict'>, {7: {'random_

In [103]:
import numpy as np
from collections import defaultdict

# get currently working directory
base_dir = os.getcwd()

# load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb').replace("\\", "/")
%run $helpers_file


FLOAT_MAX = np.finfo(np.float32).max

target_pos = 1
user_id = 8
top_k = 10
combo_specific_cfs = None
i = 0

while combo_specific_cfs == [] or combo_specific_cfs is None:
    i += 1
    combo_specific_cfs = _find_specific_cfs_(test, pretrained_models['lstm'], get_backend_strategy('combo'), target_pos, False, jaccard_sims_matrix, i, 1000, 10, alpha=0.5, normalization='default')

# combo_specific_cfs = _find_specific_cfs_(test, pretrained_models['lstm'], get_backend_strategy('combo'), target_pos, False, jaccard_sims_matrix, user_id, 1000, top_k)
print("user_id = ", i)
user_sequences = test.sequences[test.user_ids == user_id]
user_sequences = [sequence for sequence in user_sequences if all(value > 0 for value in sequence)]

original_interactions = user_sequences[0]
print("original_interactions", original_interactions)

best_interactions = combo_specific_cfs[0].interactions['best']
print("best_interactions", best_interactions)

items_removed = np.setdiff1d(original_interactions, best_interactions)
print("items_removed", items_removed)

predictions = -pretrained_models['lstm'].predict(original_interactions)
predictions[original_interactions] = FLOAT_MAX
target_item = predictions.argsort()[min(top_k, target_pos)]
print("target_item", target_item)

predictions_reverse = -pretrained_models['lstm'].predict(best_interactions)
predictions_reverse[best_interactions] = FLOAT_MAX
pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0]
print("position target item in reverse mode", pos_target_item_reverse)

worst_jaccard_sample = find_worst_items_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, 20)
print("worst_jacc", worst_jaccard_sample)

jaccard_sample = find_best_items_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, 20)
print("jacc", jaccard_sample)
rs_sample = find_best_items_using_recommender(target_item, best_interactions, pretrained_models['lstm'], 20)
if set(items_removed) <= set(jaccard_sample):
    print("jaccard_sample", jaccard_sample)
    search_info = retrieve_solutions_specific_sequence(i, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, jaccard_sample)

elif set(items_removed) <= set(rs_sample):
    print("rs_sample", rs_sample)
    search_info = retrieve_solutions_specific_sequence(i, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, rs_sample)
else:
    worst_jaccard_sample[:len(items_removed)] = items_removed[:len(items_removed)]
    print("modified_sample", worst_jaccard_sample)
    search_info = retrieve_solutions_specific_sequence(i, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, worst_jaccard_sample)

print("search_info", search_info[0].interactions['best'])

result2 = dict.fromkeys([pos_target_item_reverse])
result2[pos_target_item_reverse] = []
result2[pos_target_item_reverse].extend(search_info)

cnt2 = defaultdict(dict)
no_target_achieved_cases2 = defaultdict(list)


cnt2, no_target_achieved_cases2 = convert_res_to_lists(result2, cnt2, no_target_achieved_cases2, "random_0", True)
print(cnt2)
print(no_target_achieved_cases2)

result = dict.fromkeys([target_pos])
result[target_pos] = []
result[target_pos].extend(combo_specific_cfs)

cnt = defaultdict(dict)
no_target_achieved_cases = defaultdict(list)

target_item = 0

cnt, no_target_achieved_cases = convert_res_to_lists(result, cnt, no_target_achieved_cases, "random_0", False)
print(cnt)
print(no_target_achieved_cases)

user_id =  5
original_interactions [384 461 236 478  86 457 511  60 323 443 360 473 472 514 456 481 119 450
 453 444]
best_interactions []
items_removed [ 60  86 119 236 323 360 384 443 444 450 453 456 457 461 472 473 478 481
 511 514]
target_item 1439


ValueError: zero-size array to reduction operation maximum which has no identity

# Evaluation of the sampling strategies 

## Prepare data

In [None]:
cnt = defaultdict(dict)
no_target_achieved_cases = defaultdict(list)
strs_prefix = ''
missing_target_in_topk = True

for i in range(1):
    # Convert results to lists for each strategy
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[0]}_cfs')[i],
        cnt, no_target_achieved_cases, f'random_{i}', missing_target_in_topk)
    
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[1]}_cfs')[i],
        cnt, no_target_achieved_cases, f'brute_force_{i}', missing_target_in_topk)

    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[2]}_cfs')[i+1],
        cnt, no_target_achieved_cases, f'bfs_{i}', missing_target_in_topk)

    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[3]}_cfs')[i],
        cnt, no_target_achieved_cases, f'bidirectional_001_{i}', missing_target_in_topk)
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[3]}_cfs')[i + 1],
        cnt, no_target_achieved_cases, f'bidirectional_5_{i}', missing_target_in_topk)
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[3]}_cfs')[i + 2],
        cnt, no_target_achieved_cases, f'bidirectional_999_{i}', missing_target_in_topk)
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[4]}_cfs')[i],
        cnt, no_target_achieved_cases, f'combo_001_{i}', missing_target_in_topk)
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[4]}_cfs')[i + 1],
        cnt, no_target_achieved_cases, f'combo_5_{i}', missing_target_in_topk)
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[4]}_cfs')[i + 2],
        cnt, no_target_achieved_cases, f'combo_999_{i}', missing_target_in_topk)

In [None]:
print(cnt)

In [None]:
# Define a list of counterfactual methods supported
methods_supported = ['Brute_Force', 'Random', 'BFS'] + \
    [f'BiDirectional_{m}' for m in ['001', '5', '999']] + \
    [f'Combo_{m}' for m in ['001', '5', '999']]

#it is at this index that starts and ends the stats_per_cardinality storing
custom_range = [9, 29] 

# Get the target positions from the keys of the 'cnt' dictionary
target_pos = list(cnt.keys())

# Define column names for the resulting DataFrame
cnames = ['user_id', 'method', 'pos', 'budget', 'init_budget', 'best_budget', 'cardinality', 'cfs_orig', 'cfs']

rows, cols = 5, 1

# Initialize an empty list to store several DataFrames
tmp_dfs = []

for col in range(cols):
    for pos, row in zip(target_pos, range(rows)):
        tmp_dfs.append( 
            # Constructs a DataFrame from a list of records.
            pd.DataFrame.from_records(
                list(
                    itertools.chain(
                        *(
                            zip(
                                v[6], itertools.repeat(k), itertools.repeat(pos), v[i], v[2], v[3], 
                                itertools.repeat(i - custom_range[0] + 1), v[7], v[8]
                            )
                            for k, v in cnt[pos].items() if f'_{col}' in k and k.rsplit('_', 1)[0] in map(lambda x: x.lower(), methods_supported)
                            for i in range(custom_range[0], custom_range[1])
                        )
                    )
                ), columns=cnames
            )
        )

df = pd.concat(tmp_dfs, ignore_index=True)
print(df.head(20))

In [None]:
# Creates a boolean Series (idx) indicating which rows are duplicates based on the conditions specified
idx = df.loc[(df['cfs_orig'] == df['cfs'])].duplicated(subset=['user_id', 'method', 'pos'], keep='last')

# This line updates the 'budget' column for rows where 'cfs_orig' is equal to 'cfs' and are not duplicate with the values from the 'best_budget' column
df.loc[(df['cfs_orig'] == df['cfs']) & ~idx, ['budget']] = df.loc[(df['cfs_orig'] == df['cfs']) & ~idx]['best_budget']

In [None]:
'''Create dataframes for the plots'''

# 1st plot: %users per card

# Calculates the total number of unique users
all_users = df['user_id'].nunique()

res = []

# Loop for the cardinality
for i in range(1, 21):
    res.append(df[(df['budget'] > 0) & (df['cardinality'] <= i) & (df['best_budget'] <= 1000)]
               .groupby(['method', 'pos'], sort=False)[['user_id', 'cfs']]
               .nunique()
               .reset_index())

ddf = pd.concat(res, ignore_index=True)
ddf['%users'] = (ddf['user_id'] / all_users) * 100

# 2nd plot: average budget


init_budget = 100
end_budget = 1001
step_budget = 100

# Initialize an empty list to store aggregated results
res = []

# Iterate over a range of budget thresholds
for b in range(init_budget, end_budget, step_budget):
    # Filter DataFrame based on budget and best_budget conditions
    filtered_df = df[(df['budget'] > 0) & (df['best_budget'] < b)]
    
    # Group by 'method' and 'pos', calculate mean of 'cfs' and number of unique users
    aggregated = filtered_df.groupby(['method', 'pos'], sort=False)\
        .agg(card_mean=('cfs', 'mean'), no_users=('user_id', 'nunique'))\
        .reset_index()
    
    # Add a new column 'budget_thres' with the current budget threshold
    aggregated['budget_thres'] = b
    
    # Add a new column 'max_users' with the maximum number of users for each 'pos'
    aggregated['max_users'] = aggregated.groupby(['pos'])['no_users'].transform(max)
    
    # Append the aggregated DataFrame to the list
    res.append(aggregated)

hard_case_filtered_df = df[(df['budget'] > 0) & (df['best_budget'] > 1000)]
print("size hard_cases", len(hard_case_filtered_df))

# Concatenate all DataFrames in the list into a single DataFrame
ddf2 = pd.concat(res, ignore_index=True)

# Calculate the percentage of users based on the total number of users
ddf2['%users'] = (ddf2['no_users'] / all_users) * 100


# 3rd plot :

def aggf(data):
    # For each user, select the data rows where the user's budget is at its maximum.
    # By doing so, the resulting DataFrame will contain, for each user, only those records 
    # where he spent his highest budget.
    users_grouping = data.groupby(['user_id']).apply(
        lambda d: d.loc[d['budget'] == d['budget'].max()]
    )

    # Calculate the mean budget and count of users for the group
    return pd.Series(
        [users_grouping['budget'].mean(),
         users_grouping['user_id'].count()],
        ['avg_budget', 'no_users']
    )

# Initialize an empty list to store aggregated results
res = []

# Iterate over a range of cardinalities in descending order
for i in range(5, 0, -1):
    # Filter DataFrame based on budget, cardinality, and best_budget conditions
    filtered_df = df[(df['budget'] > 0) & (df['cardinality'] <= i) & (df['best_budget'] <= 1000)]
    
    # Group by 'method' and 'pos', apply custom aggregation function
    aggregated = filtered_df.groupby(['method', 'pos'], sort=False)\
        .apply(aggf)\
        .reset_index()
    
    # Add a new column 'cardinality' with the current cardinality
    aggregated['cardinality'] = i
    
    # Append the aggregated DataFrame to the list
    res.append(aggregated)

# Concatenate all DataFrames in the list into a single DataFrame
ddf3 = pd.concat(res, ignore_index=True)

# Calculate the percentage of users based on the total number of users
ddf3['%users'] = (ddf3['no_users'] / all_users) * 100


## Plots

In [None]:
import matplotlib.ticker as mticker


sns.set_theme(style="ticks", font_scale=1.5, )

# Create an array with the colors you want to use
colors = ['black', 'darkred', 'green', 'darkblue', 'dodgerblue', 'darkturquoise', 'darkgoldenrod', 'orange', 'chocolate']

# Set your custom color palette
custom_palette = sns.set_palette(sns.color_palette(colors))

custom_markers = ['o', 'v', '^', 's', 'P', 'X', '<', '>', 'D', ]
# custom_markers = ['o', 'v', '^', 'P', 'P', 'P', 'X', 'X', 'X']
custom_dashes = [(1, 0), (4, 0), (4, 0), (4, 0), (6, 2), (1, 2), (1, 0), (6, 2), (1, 2)]

In [None]:
a4_dims = (11.7, 8.27)

g = sns.catplot(
    data=ddf[(ddf['pos'] == 13)],  # Filter data for a specific position
    kind="bar",  # Type of plot
    x="cfs",  # X-axis variable
    y="%users",  # Y-axis variable
    hue="method",  # Variable for color differentiation
    ci="sd",  # Confidence interval for error bars
    capsize=0.1,  # Size of the caps on error bars
    palette="dark",  # Color palette to use
    alpha=.8,  # Transparency of bars
    height=6,  # Height of the plot
    aspect=4,  # Aspect ratio of the plot
)

In [None]:
# Loop through each position and create individual line plots
for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(
        ax=ax, data=ddf2[(ddf2['pos'] == p)],
        x="budget_thres", y="card_mean", hue="method", style='method',
        palette=custom_palette,
        markers=custom_markers[:len(ddf2['method'].unique())], markersize=12, lw=2, ls='solid', dashes=custom_dashes[:len(ddf2['method'].unique())]
    )

    # Set the y-axis to logarithmic scale
    # g.set(yscale='log')

    # Set formatting for y-axis labels
    ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.0f'))
    ax.yaxis.set_minor_formatter(mticker.FormatStrFormatter('%.0f'))

    # Customize the legend
    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='upper left', bbox_to_anchor=(0, 0.9), ncol=3)

    # Replace legend labels with new labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels):
        t.set_text(l)

    # Set labels for x-axis and y-axis, and title for the plot
    ax.set(xlabel="Budget", ylabel="Avg. Explanation Length")
    ax.set_title(f'Target position {p}')

    # Adjust layout and save the plot as a PDF file
    fig.tight_layout()
    plt.savefig(os.path.join(plots_folder, f"avg_card_to_thres_pos_{p}.pdf"), dpi=300)

In [None]:
# Loop through target positions and create line plots for the percentage of users against budget thresholds

# Iterate over target positions and their corresponding groups in the DataFrame
for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(
        ax=ax, data=ddf2[(ddf2['pos'] == p)],
        x="budget_thres", y="%users", hue="method", style='method',
        palette=custom_palette, markers=custom_markers, markersize=12, dashes=custom_dashes, lw=2,
    )

    # Customize the legend
    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='lower right', ncol=3)

    # Replace legend labels with new labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels):
        t.set_text(l)

    # Set axis labels and title
    ax.set(xlabel="Budget", ylabel="Explained Recommendations (%)")
    ax.set_title(f'Target position {p}')

    # Adjust layout for better visualization
    fig.tight_layout()

    # Save the plot as a PDF file in the specified plots folder
    plt.savefig(os.path.join(plots_folder, f"users_percent_to_thres_pos_{p}.pdf"), dpi=300)

In [None]:
# Loop through target positions and create line plots for the percentage of users against budget thresholds

# Iterate over target positions and their corresponding groups in the DataFrame
for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(
        ax=ax, data=ddf2[(ddf2['pos'] == p)],
        x="card_mean", y="%users", hue="method", style='method',
        palette=custom_palette, markers=custom_markers, markersize=12, dashes=custom_dashes, lw=2,
    )

    # Customize the legend
    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='lower right', ncol=3)

    # Replace legend labels with new labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels):
        t.set_text(l)

    # Set axis labels and title
    ax.set(xlabel="Avg. Explanation Length", ylabel="Explained Recommendations (%)")
    ax.set_title(f'Target position {p}')

    # Adjust layout for better visualization
    fig.tight_layout()

    # Save the plot as a PDF file in the specified plots folder
    plt.savefig(os.path.join(plots_folder, f"users_percent_to_thres_pos_{p}.pdf"), dpi=300)

In [None]:
for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(
        ax=ax, data=ddf2[(ddf2['pos'] == p)],
        x="%users", y="card_mean", hue="method", style='method',  # Inverser x et y
        palette=custom_palette, markers=custom_markers, markersize=12, dashes=custom_dashes, lw=2,
    )

    # Customize the legend
    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='lower right', ncol=3)

    # Replace legend labels with new labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels):
        t.set_text(l)

    # Set axis labels and title
    ax.set(xlabel="Explained Recommendations (%)", ylabel="Avg. Explanation Length")  # Inverser les étiquettes
    ax.set_title(f'Target position {p}')

    # Adjust layout for better visualization
    fig.tight_layout()

    # Save the plot as a PDF file in the specified plots folder
    plt.savefig(os.path.join(plots_folder, f"users_percent_to_thres_pos_{p}.pdf"), dpi=300)


In [None]:
# Step 1: Print unique methods and custom markers side by side
unique_methods = ddf3['method'].unique()
print("Methods:", unique_methods)
print("Custom Markers:", custom_markers)

# Step 2: Create a mapping of methods to markers
method_marker_mapping = list(zip(unique_methods, custom_markers))
print("Method to Marker Mapping:", method_marker_mapping)


In [None]:
custom_markers = ['o', 'v', '^', 'P', 'P', 'P', 'X', 'X', 'X']
custom_dashes = [(1, 0), (4, 0), (4, 0), (4, 0), (6, 2), (1, 2), (1, 0), (6, 2), (1, 2)]

In [None]:
# Loop through target positions and create line plots for average budget spent against acceptable explanation length

# Iterate over target positions and their corresponding groups in the DataFrame
for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(
        ax=ax, data=ddf3[(ddf3['pos']==p)],
        x="cardinality", y="avg_budget", hue="method", style='method',
        palette=custom_palette, markers=custom_markers, markersize=12, dashes=custom_dashes, lw=2
    )

    # Set minor locator and formatter for y-axis
    locmin = mticker.LogLocator(base=10, subs=[10, 20, 50, 100, 200, 500, 700], numticks=12)
    ax.yaxis.set_minor_locator(locmin)
    ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.0f'))
    ax.yaxis.set_minor_formatter(mticker.FormatStrFormatter('%.0f'))

    # Set x-axis ticks
    ax.set_xticks(np.arange(1, 6, 1))

    # Uncomment the following line to add grid lines
    # plt.grid(True, which="both", ls="--", c='gray')

    # Customize the legend
    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='center left', bbox_to_anchor=(0, 0.65), ncol=3)

    # Replace legend labels with new labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels):
        t.set_text(l)

    # Set axis labels and title
    ax.set(xlabel="Acceptable Explanation Length", ylabel="Avg. Budget Spent")
    ax.set_title(f'Target Position {p}')

    # Adjust layout for better visualization
    fig.tight_layout()

    # Save the plot as a PDF file in the specified plots folder
    plt.savefig(os.path.join(plots_folder, f"avg_budget_to_card_pos_{p}.pdf"), dpi=300)

In [None]:
for p in ddf3['pos'].unique():
    fig, ax = plt.subplots(figsize=(11.7, 8.27))
    sns.lineplot(ax=ax, data=ddf3[ddf3['pos']==p], x="cardinality", y="avg_budget", hue="method")
    plt.show()


In [None]:
# Iterate over target positions and create line plots for percentage of users against acceptable explanation length

# Iterate over target positions and their corresponding groups in the DataFrame
for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(ax=ax,
        data=ddf3[(ddf3['pos']==p) ],
        x="cardinality", y="%users", hue="method", style='method',
        palette=custom_palette, markers=custom_markers, markersize=12, dashes=custom_dashes, lw=2
    )

    ax.set_xticks(np.arange(1, 6, 1))


    # Customize the legend
    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='lower right', ncol=3)

    # Replace legend labels with new labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels):
        t.set_text(l)

    # Set axis labels and title
    ax.set(xlabel="Acceptable Explanation Length", ylabel="Explained Recommendations (%)")
    ax.set_title(f'Target position {p}')

    # Adjust layout for better visualization
    fig.tight_layout()

    # Save the plot as a PDF file in the specified plots folder
    plt.savefig(os.path.join(plots_folder, f"users_percent_to_card_pos_{p}.pdf"), dpi=300)

# Amazon

## Load data

In [None]:
## uncomment the following lines if you want to run the various strategies in budget_strategies
# implemented_strategies = os.path.join(base_dir, 'budget_strategies.ipynb')
# %run $implemented_strategies

%store -r amazon_random_cfs
%store -r amazon_brute_force_cfs

%store -r amazon_bfs_yloss_cfs
# %store -r dfs_yloss_cfs
# %store -r bestfs_yloss_cfs

%store -r amazon_bidirectional_cfs
%store -r amazon_combo_cfs

## Prepare data

In [None]:
from collections import Counter


# cnt = dict.fromkeys(most_sim_cfs.keys())
# no_target_achieved_cases = dict.fromkeys(most_sim_cfs.keys())
cnt = defaultdict(dict)
no_target_achieved_cases = defaultdict(list)
strs_prefix = 'amazon_'

for i in range(1):
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[0]}_cfs')[i],
        cnt, no_target_achieved_cases, f'random_{i}')
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[1]}_cfs')[i],
        cnt, no_target_achieved_cases, f'brute_force_{i}')

    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[2]}_cfs')[i],
        cnt, no_target_achieved_cases, f'bfs_{i}')

    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[3]}_cfs')[i],
        cnt, no_target_achieved_cases, f'bidirectional_001_{i}')
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[3]}_cfs')[i + 1],
        cnt, no_target_achieved_cases, f'bidirectional_5_{i}')
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[3]}_cfs')[i + 2],
        cnt, no_target_achieved_cases, f'bidirectional_999_{i}')

    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[4]}_cfs')[i],
        cnt, no_target_achieved_cases, f'combo_001_{i}')
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[4]}_cfs')[i + 1],
        cnt, no_target_achieved_cases, f'combo_5_{i}')
    cnt, no_target_achieved_cases = convert_res_to_lists(
        getattr(current_module, f'{strs_prefix}{Strategies.stypes[4]}_cfs')[i + 2],
        cnt, no_target_achieved_cases, f'combo_999_{i}')

In [None]:
methods_supported = ['Brute_Force', 'Random', 'BFS'] + \
    [f'BiDirectional_{m}' for m in ['001', '5', '999']] + \
    [f'Combo_{m}' for m in ['001', '5', '999']]

strategies = [('init', 2), ('best', 3)]
custom_range = [9, 29]
target_pos = list(cnt.keys())
cnames = ['user_id', 'method', 'pos', 'budget', 'init_budget', 'best_budget', 'cardinality', 'cfs_orig', 'cfs']

rows, cols = 5, 1

tmp_dfs = []
for col in range(cols):
    for pos, row in zip(target_pos, range(rows)):

        tmp_dfs.append(pd.DataFrame.from_records(list(
            itertools.chain(*(
                zip(v[6], itertools.repeat(k), itertools.repeat(pos), v[i], v[2], v[3],
                    itertools.repeat(i - custom_range[0] + 1), v[7], v[8])
                for k, v in cnt[pos].items() if f'_{col}' in k and k.rsplit('_', 1)[0] in map(lambda x: x.lower(), methods_supported)
                for i in range(custom_range[0], custom_range[1])
            ))), columns=cnames
        ))

df = pd.concat(tmp_dfs, ignore_index=True)

In [None]:
idx = df.loc[(df['cfs_orig'] == df['cfs'])].duplicated(subset=['user_id', 'method', 'pos'], keep='last')
df.loc[(df['cfs_orig'] == df['cfs']) & ~idx, ['budget']] = df.loc[(df['cfs_orig'] == df['cfs']) & ~idx]['best_budget']

In [None]:
# # 1st plot: %users per card
all_users = df['user_id'].nunique()

res = []
for i in range(1, 21):
    res.append(df[(df['budget'] > 0) & (df['cardinality'] <= i) & (df['best_budget'] <= 1000)]
               .groupby(['method', 'pos'], sort=False)[['user_id', 'cfs']]
               .nunique()
               .reset_index())

ddf = pd.concat(res, ignore_index=True)
ddf['%users'] = (ddf['user_id'] / all_users) * 100

# 2nd plot: average budget
max_card = 5
init = 100
end = 1001
step = 100


res = []
for b in range(init, end, step):
    aggregated = df[(df['budget'] > 0) & (df['best_budget'] <= b)]\
        .groupby(['method', 'pos'], sort=False)\
        .agg(card_mean=('cfs', 'mean'), no_users=('user_id', 'nunique'))\
        .reset_index()
    aggregated['budget_thres'] = b
    aggregated['max_users'] = aggregated.groupby(['pos'])['no_users'].transform(max)
    res.append(aggregated)

ddf2 = pd.concat(res, ignore_index=True)
ddf2['%users'] = (ddf2['no_users'] / all_users) * 100
# ddf2['combined_means'] = ((ddf2['card_mean'] * ddf2['no_users']) + (20 * (ddf2['max_users'] - ddf2['no_users']))) / (ddf2['max_users'])
ddf2['combined_means'] = ddf2['card_mean']


# 3rd plot:
def aggf(data):
    users_grouping = data.groupby(['user_id']).apply(
        lambda d: d.loc[d['budget'] == d['budget'].max()]
    )

    return pd.Series(
        [users_grouping['budget'].mean(),
         users_grouping['user_id'].count()],
        ['avg_budget', 'no_users']
    )

res = []
for i in range(5, 0, -1):
    aggregated = df[(df['budget'] > 0) & (df['cardinality'] <= i) & (df['best_budget'] <= 1000)]\
        .groupby(['method', 'pos'], sort=False)\
        .apply(aggf)\
        .reset_index()
    aggregated['cardinality'] = i
    res.append(aggregated)

ddf3 = pd.concat(res, ignore_index=True)
ddf3['%users'] = (ddf3['no_users'] / all_users) * 100

## Plots

In [None]:
import matplotlib.ticker as mticker


sns.set_theme(style="ticks", font_scale=1.5, )

# Create an array with the colors you want to use
# colors = ["#FF0B04", "#4374B3", "#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
colors = ['black', 'darkred', 'green', 'darkblue', 'dodgerblue', 'darkturquoise', 'darkgoldenrod', 'orange', 'chocolate']

# Set your custom color palette
custom_palette = sns.set_palette(sns.color_palette(colors))

# custom_markers = ['o', 'v', '^', 's', 'P', 'X', '<', '>', 'D', ]
custom_markers = ['o', 'v', '^', 'P', 'P', 'P', 'X', 'X', 'X']
custom_dashes = [(1, 0), (4, 0), (4, 0), (4, 0), (6, 2), (1, 2), (1, 0), (6, 2), (1, 2)]

In [None]:
# Draw a nested barplot by species and sex
# g = sns.catplot(
#     data=ddf2[ddf2['pos']==3], kind="bar",
#     x="budget_thres", y='combined_means', hue="method",
#     ci=None, palette="dark", alpha=.8, height=6, aspect=4,
# )

# num_locations = len(ddf2.groupby('budget_thres').groups.keys())
# hatches = itertools.cycle(['//', '+', '-', 'x', '\\', '*', 'o', '.'])
# for i, bar in enumerate(g.ax.patches):
#     if i % num_locations == 0:
#         hatch = next(hatches)
#     bar.set_hatch(hatch)

# g.ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True, shadow=False)

# g.despine(left=True)
# g.set_axis_labels("threshold", "cardinality average")
# g.legend.set_title("")

rows, cols = len(df.groupby('pos').groups.keys()), 1

for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(
        ax=ax, data=ddf2[(ddf2['pos'] == p)],
        x="budget_thres", y="card_mean", hue="method", style='method',
        palette=custom_palette,
        markers=custom_markers, markersize=12, lw=2, ls='solid', dashes=custom_dashes
    )

    g.set(yscale='log')
#     locmin = mticker.LogLocator(base=10, subs=np.arange(0, 1, 0.1), numticks=12)
#     ax.yaxis.set_minor_locator(locmin)
#     ax.yaxis.set_minor_formatter(mticker.NullFormatter())
#     ax.yaxis.set_major_formatter(mticker.ScalarFormatter())
#     ax.yaxis.get_major_formatter().set_scientific(False)
#     ax.yaxis.get_major_formatter().set_useOffset(False)

    # Turns on grid on the Axis.
#     plt.grid(True, which="both", ls="--", c='gray')
#     scientific_formatter = mticker.FuncFormatter()
#     ax.yaxis.set_major_formatter(mticker.ScalarFormatter(useMathText=True))
    ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.0f'))
    ax.yaxis.set_minor_formatter(mticker.FormatStrFormatter('%.0f'))

    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='upper left', bbox_to_anchor=(0, 0.9), ncol=3,)

    # replace labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels): t.set_text(l)

    ax.set(xlabel="Budget", ylabel="Avg. Explanation Length")
    ax.set_title(f'Target position {p}')

    fig.tight_layout()
    plt.savefig(os.path.join(plots_folder, f"{strs_prefix}avg_card_to_thres_pos_{p}.pdf"), dpi=300)

In [None]:
rows, cols = len(df.groupby('pos').groups.keys()), 1

for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(
        ax=ax, data=ddf2[(ddf2['pos'] == p)],
        x="budget_thres", y="%users", hue="method", style='method',
        palette=custom_palette, markers=custom_markers, markersize=12, dashes=custom_dashes, lw=2,
    )

#     g.set(yscale='log')
#     locmin = mticker.LogLocator(base=10, subs=np.arange(0, 1, 0.1), numticks=12)
#     ax.yaxis.set_minor_locator(locmin)
#     ax.yaxis.set_minor_formatter(mticker.NullFormatter())

#     plt.grid(True, which="both", ls="--", c='gray')

    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='lower right', ncol=3,)

    # replace labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels): t.set_text(l)

    ax.set(xlabel="Budget", ylabel="Explained Recommendations (%)")
    ax.set_title(f'Target position {p}')

    fig.tight_layout()
    plt.savefig(os.path.join(plots_folder, f"{strs_prefix}users_percent_to_thres_pos_{p}.pdf"), dpi=300)

In [None]:
rows, cols = len(df.groupby('pos').groups.keys()), 1

for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(
        ax=ax, data=ddf3[(ddf3['pos']==p)],
        x="cardinality", y="avg_budget", hue="method", style='method',
        palette=custom_palette, markers=custom_markers, markersize=12, dashes=custom_dashes, lw=2
    )
    g.set(yscale='log')
    locmin = mticker.LogLocator(base=10, subs=[10, 20, 50, 100, 200, 500, 700], numticks=12)
    ax.yaxis.set_minor_locator(locmin)
    ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.0f'))
    ax.yaxis.set_minor_formatter(mticker.FormatStrFormatter('%.0f'))
#     ax.yaxis.set_minor_formatter(mticker.NullFormatter())

    ax.set_xticks(np.arange(1, 6, 1))

#     plt.grid(True, which="both", ls="--", c='gray')

    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='center left', bbox_to_anchor=(0, 0.65), ncol=3,)
    # replace labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels): t.set_text(l)

    ax.set(xlabel="Acceptable Explanation Length", ylabel="Avg. Budget Spent")
    ax.set_title(f'Target Position {p}')

    fig.tight_layout()
    plt.savefig(os.path.join(plots_folder, f"{strs_prefix}avg_budget_to_card_pos_{p}.pdf"), dpi=300)

In [None]:
rows, cols = len(df.groupby('pos').groups.keys()), 1

for i, p in enumerate(df.groupby('pos').groups.keys()):
    a4_dims = (11.7, 8.27)
    fig, ax = plt.subplots(figsize=a4_dims)

    g = sns.lineplot(ax=ax,
        data=ddf3[(ddf3['pos']==p) ],
        x="cardinality", y="%users", hue="method", style='method',
        palette=custom_palette, markers=custom_markers, markersize=12, dashes=custom_dashes, lw=2
    )

#     g.set(yscale='log')
#     locmin = mticker.LogLocator(base=10, subs=np.arange(0, 1, 0.1), numticks=12)
#     ax.yaxis.set_minor_locator(locmin)
#     ax.yaxis.set_minor_formatter(mticker.NullFormatter())
#     ax.yaxis.set_minor_formatter(mticker.NullFormatter())

    ax.set_xticks(np.arange(1, 6, 1))

#     plt.grid(True, which="both", ls="--", c='gray')

    lgnd = ax.legend(fancybox=True, frameon=True, labelspacing=0, borderpad=1, loc='lower right', ncol=3,)
    # replace labels
    new_labels = ['Rnd', 'Exh', 'BFS', 'Pri_0.001', 'Pri_0.5', 'Pri_0.999', 'Hyb_0.001', 'Hyb_0.5', 'Hyb_0.999']
    for t, l in zip(lgnd.texts, new_labels): t.set_text(l)

    ax.set(xlabel="Acceptable Explanation Length", ylabel="Explained Recommendations (%)")
    ax.set_title(f'Target position {p}')

    fig.tight_layout()
    plt.savefig(os.path.join(plots_folder, f"{strs_prefix}users_percent_to_card_pos_{p}.pdf"), dpi=300)