In [1]:
import warnings
warnings.filterwarnings("ignore", message=r"Passing", category=FutureWarning)
warnings.filterwarnings("ignore", message=r"Implicit", category=UserWarning)
warnings.filterwarnings("ignore", message=r"cannot", category=UserWarning)

In [2]:
import os
import sys
sys.path.append("..")

from carla.recourse_methods import Dice, Wachter, Revise, Clue
from copy import deepcopy
from src.python import (
    DynamicCsvCatalog,
    DynamicOnlineCatalog,
    RecourseExperiment,
    RecourseGenerator,
    train_model,
    plot_experiment,
    generate_gif
)

os.environ["CUDA_VISIBLE_DEVICES"] = ""
data_path = "../data"

  from .autonotebook import tqdm as notebook_tqdm
Using TensorFlow backend.


[INFO] Using Python-MIP package version 1.12.0 [model.py <module>]


We load the dataset as a `DynamicCsvCatalog` and provide it with the lists of categorical and continuous features, a list of immutable features that should not be taken into consideration by the recourse generator, the target, encoding of positive and negative classes, size of the test set which is used to assess the error of the classifier, and a name.

In [3]:
dataset = DynamicCsvCatalog(file_path=os.path.join(data_path,"give_me_some_credit_balanced.csv"), 
                            categorical=[],
                            continuous=['RevolvingUtilizationOfUnsecuredLines',
                                        'age', 'NumberOfTime30-59DaysPastDueNotWorse',
                                        'DebtRatio', 'MonthlyIncome',
                                        'NumberOfOpenCreditLinesAndLoans',
                                        'NumberOfTimes90DaysLate',
                                        'NumberRealEstateLoansOrLines',
                                        'NumberOfTime60-89DaysPastDueNotWorse',
                                        'NumberOfDependents'],
                            immutables=['age'], target='SeriousDlqin2yrs',
                            positive=1, negative=0, test_size=0.3, name="GMSC")

[INFO] Balance: train set 0.5, test set 0.5 [dynamic_csv_catalog.py __init__]


Our frameworks supports simultaneous assessment of multiple generators. To that end, we pass a list of `RecourseGenerators` which should be put to test in the same experimental conditions.

Data about the dynamics of recourse is collected for all of the declared generators. It can be saved to a `.csv` file.

In [4]:
hyper_params = {'learning_rate': 0.01, 'epochs': 10, 'batch_size': 1, 'hidden_size': [5]}
name='GMSC_experiment'
timeout_seconds = 120
n_folds = 5
epochs = 1
rounds_per_epoch = 10

# Revise params
def revise_params(model, name=name):
  revise_params = {
    "data_name": name, 
    "optimizer": "adam", 
    "vae_params": {
      "train": True,
      "layers": [len(model.feature_input_order), 512, 256, 8], 
    }
  }
  return revise_params

## Clue params
clue_params = {
  "data_name": "custom",
  "train_vae": True,
  "width": 10,
  "depth": 3,
  "latent_dim": 12,
  "batch_size": 4,
  "epochs": 5,
  "lr": 0.0001,
  "early_stop": 20,
}

# SET NAME BEFORE THE EXPERIMENT
for index in range(n_folds):

  # Recourse generated by DICE is compared with the Wachter generator, as they may modify data differently
  # we need to keep track of two models and two datasets and update them independently
  model = train_model(deepcopy(dataset), hyper_params, model_type='ann')

  generators = [
    RecourseGenerator(
      'Wachter', deepcopy(dataset), deepcopy(model),
      Wachter, {"loss_type": "BCE", "t_max_min": 2}, hyper_params, timeout_seconds
    ),
    RecourseGenerator(
      'DICE_3', deepcopy(dataset), deepcopy(model),
      Dice, {"num": 3}, hyper_params, timeout_seconds
    ),
    RecourseGenerator(
      'REVISE', deepcopy(dataset), deepcopy(model),
      Revise, revise_params(model), hyper_params, timeout_seconds
    ),
    RecourseGenerator(
      'CLUE', deepcopy(dataset), deepcopy(model),
      Clue, clue_params, hyper_params, timeout_seconds
    )
  ]

  experiment = RecourseExperiment(dataset, model, generators, name, hyper_params)
  experiment.run(epochs=epochs, recourse_per_epoch=rounds_per_epoch, calculate_p=1500)
  experiment.save_data()

[INFO] Training the initial model [recourse_generator.py train_model]
[INFO] Start training of Variational Autoencoder... [models.py fit]
[INFO] [Epoch: 0/5] [objective: 2.446] [models.py fit]
[INFO] [ELBO train: 2.45] [models.py fit]
[INFO] [ELBO train: 0.43] [models.py fit]
[INFO] [ELBO train: 0.33] [models.py fit]
[INFO] [ELBO train: 0.26] [models.py fit]
[INFO] [ELBO train: 0.23] [models.py fit]
[INFO] ... finished training of Variational Autoencoder. [models.py fit]
[INFO] 
Net: [utils.py __init__]
[INFO] VAE_gauss_net [fc_gauss_cat.py __init__]
[INFO] Total params: 0.00M [fc_gauss_cat.py create_net]
[INFO] 
Network: [train.py train_VAE]
[INFO] 
Train: [train.py train_VAE]
[INFO] init cost variables: [train.py train_VAE]


	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:882.)
  exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)


[INFO] it 0/5, vlb -9.243020,  [train.py train_VAE]
[INFO] time: 3.333032 seconds
 [train.py train_VAE]
[INFO] vlb -950.682777 (-inf)
 [train.py train_VAE]
[INFO] Writting /Users/FA31DU/carla/models/autoencoders/clue/fc_VAE_custom_models/theta_best.dat
 [utils.py save]
[INFO] it 1/5, vlb -7.037175,  [train.py train_VAE]
[INFO] time: 3.069075 seconds
 [train.py train_VAE]
[INFO] vlb -97.270361 (-950.682777)
 [train.py train_VAE]
[INFO] Writting /Users/FA31DU/carla/models/autoencoders/clue/fc_VAE_custom_models/theta_best.dat
 [utils.py save]
[INFO] it 2/5, vlb -5.259840,  [train.py train_VAE]
[INFO] time: 3.103199 seconds
 [train.py train_VAE]
[INFO] vlb -102.960527 (-97.270361)
 [train.py train_VAE]
[INFO] it 3/5, vlb -4.108694,  [train.py train_VAE]
[INFO] time: 2.995562 seconds
 [train.py train_VAE]
[INFO] vlb -103.550050 (-97.270361)
 [train.py train_VAE]
[INFO] it 4/5, vlb -3.228565,  [train.py train_VAE]
[INFO] time: 3.140887 seconds
 [train.py train_VAE]
[INFO] vlb -77.880078 (-97

IndexError: tensors used as indices must be long, byte or bool tensors

We also provide multiple functions that simplify the visualization of collected data.

In [None]:
experiment_path = f'../experiment_data/{experiment.experiment_name}'
for g in generators:
    generate_gif(experiment_path, g.name)

In [None]:
config = [
    {'type': 'pos_MMD', 'dict_path': ['MMD', 'positive', 'value']},
    {'type': 'neg_MMD', 'dict_path': ['MMD', 'negative', 'value']},
    {'type': 'pos_distance', 'dict_path': ['decisiveness', 'positive']},
    {'type': 'neg_distance', 'dict_path': ['decisiveness', 'negative']},
    {'type': 'disagreement', 'dict_path': ['disagreement']},
    {'type': 'model_MMD', 'dict_path': ['model_MMD', 'value']}
]

for c in config:
    plot_experiment(experiment_path,
                    [g.name for g in generators],
                    c['type'],
                    c['dict_path'],
                    show_plot=False)