In [None]:
import warnings
warnings.filterwarnings("ignore", message=r"Passing", category=FutureWarning)
warnings.filterwarnings("ignore", message=r"Implicit", category=UserWarning)
warnings.filterwarnings("ignore", message=r"cannot", category=UserWarning)

In [None]:
import os
import sys
sys.path.append('..')

from carla.recourse_methods import Dice, Wachter
from copy import deepcopy
from model_shifts import (DynamicCsvCatalog,
                          DynamicOnlineCatalog,
                          RecourseExperiment,
                          RecourseGenerator,
                          train_model,
                          plot_experiment,
                          generate_gif)

os.environ["CUDA_VISIBLE_DEVICES"] = ""

We load the dataset as a `DynamicCsvCatalog` and provide it with the lists of categorical and continuous features, a list of immutable features that should not be taken into consideration by the recourse generator, the target, encoding of positive and negative classes, size of the test set which is used to assess the error of the classifier, and a name.

In [None]:
dataset = DynamicCsvCatalog(file_path="../datasets/give_me_some_credit_balanced.csv", 
                            categorical=[],
                            continuous=['RevolvingUtilizationOfUnsecuredLines',
                                        'age', 'NumberOfTime30-59DaysPastDueNotWorse',
                                        'DebtRatio', 'MonthlyIncome',
                                        'NumberOfOpenCreditLinesAndLoans',
                                        'NumberOfTimes90DaysLate',
                                        'NumberRealEstateLoansOrLines',
                                        'NumberOfTime60-89DaysPastDueNotWorse',
                                        'NumberOfDependents'],
                            immutables=['age'], target='SeriousDlqin2yrs',
                            positive=1, negative=0, test_size=0.3, name="GMSC")

Our frameworks supports simultaneous assessment of multiple generators. To that end, we pass a list of `RecourseGenerators` which should be put to test in the same experimental conditions.

Data about the dynamics of recourse is collected for all of the declared generators. It can be saved to a `.csv` file.

In [None]:
hyper_params = {'learning_rate': 0.01, 'epochs': 10, 'batch_size': 1, 'hidden_size': [5]}
name='GMSC_experiment'

# SET NAME BEFORE THE EXPERIMENT
for index in range(5):

  # Recourse generated by DICE is compared with the Wachter generator, as they may modify data differently
  # we need to keep track of two models and two datasets and update them independently
  model = train_model(deepcopy(dataset), hyper_params, model_type='ann')

  generators = [
              RecourseGenerator('Wachter', deepcopy(dataset), deepcopy(model),
                                Wachter, {"loss_type": "BCE", "t_max_min": 2}, hyper_params, 120),
              RecourseGenerator('DICE_3', deepcopy(dataset), deepcopy(model),
                                Dice, {"num": 3}, hyper_params, 120),
              ]

  experiment = RecourseExperiment(dataset, model, generators, name, hyper_params)
  experiment.run(epochs=15, recourse_per_epoch=10, calculate_p=1500)
  experiment.save_data()

We also provide multiple functions that simplify the visualization of collected data.

In [None]:
experiment_path = f'../experiment_data/{experiment.experiment_name}'
for g in generators:
    generate_gif(experiment_path, g.name)

In [None]:
config = [
    {'type': 'pos_MMD', 'dict_path': ['MMD', 'positive', 'value']},
    {'type': 'neg_MMD', 'dict_path': ['MMD', 'negative', 'value']},
    {'type': 'pos_distance', 'dict_path': ['decisiveness', 'positive']},
    {'type': 'neg_distance', 'dict_path': ['decisiveness', 'negative']},
    {'type': 'disagreement', 'dict_path': ['disagreement']},
    {'type': 'model_MMD', 'dict_path': ['model_MMD', 'value']}
]

for c in config:
    plot_experiment(experiment_path,
                    [g.name for g in generators],
                    c['type'],
                    c['dict_path'],
                    show_plot=False)