In [1]:
import sys
import yaml
import argparse
import pandas as pd
import numpy as np

from carla.data.catalog import OnlineCatalog
from carla.models.catalog import MLModelCatalog
from carla.models.negative_instances import predict_negative_instances
import carla.recourse_methods.catalog as recourse_catalog

import torch

from mcce import MCCE

dataset = ["adult"]
# dataset = ['give_me_some_credit']
# dataset = ['compas']
K = 10000
n_test = 100
seed = 1
results_all = None

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  from .autonotebook import tqdm as notebook_tqdm
Using TensorFlow backend.


[INFO] Using Python-MIP package version 1.12.0 [model.py <module>]


In [None]:


# Use CARLA to load dataset and predictive model
print("Loading data from Carla...")

for data_name in dataset:
    dataset = OnlineCatalog(data_name)
    # y_col = dataset.target
    # x_col = dataset.raw.columns.to_list()
    # x_col.remove(y_col)
    
    # (1) Load predictive model and predict probabilities

    torch.manual_seed(0)
    ml_model = MLModelCatalog(
            dataset, 
            model_type="ann", 
            load_online=False, 
            backend="pytorch"
        )


    if data_name == 'adult':
        ml_model.train(
        learning_rate=0.002,
        epochs=20,
        batch_size=1024,
        hidden_size=[18, 9, 3],
        force_train=True, # don't forget to add this or it might load an older model from disk
        )
    elif data_name == 'give_me_some_credit':
        ml_model.train(
        learning_rate=0.002,
        epochs=20,
        batch_size=2048,
        hidden_size=[18, 9, 3],
        force_train=True, # don't forget to add this or it might load an older model from disk
        )
    elif data_name == 'compas':
        ml_model.train(
        learning_rate=0.002,
        epochs=25,
        batch_size=25,
        hidden_size=[18, 9, 3],
        force_train=True, # don't forget to add this or it might load an older model from disk
        )

    # (2) Find unhappy customers and choose which ones to make counterfactuals for
    
    factuals = predict_negative_instances(ml_model, dataset.df)
    test_factual = factuals.iloc[:n_test]
    # test_factual_inverse = dataset.inverse_transform(test_factual)
    
    y_col = dataset.target
    features_and_response = dataset.df.columns
    cont_feat = dataset.continuous
    cat_feat = [x for x in features_and_response if x not in cont_feat] #  these have new names since encode_normalize_order_factuals()
    
    if data_name == 'adult': 
        fixed_features = ['age', 'sex_Male']
    elif data_name == 'give_me_some_credit':
        fixed_features = ['age']
    elif data_name == 'compas':
        fixed_features = ['age', 'sex_Male', 'race_Other']
    
    #  Create dtypes for MCCE()
    dtypes = dict([(x, "float") for x in cont_feat])
    for x in cat_feat:
        dtypes[x] = "category"
    df = (dataset.df).astype(dtypes)

    import time
    start = time.time()
    # (3) Fit MCCE object
    print("Fitting MCCE model...")
    mcce = MCCE(fixed_features=fixed_features, model=ml_model, seed=1)
    mcce.fit(df.drop(y_col, axis=1), dtypes)
    print("Generating counterfactuals with MCCE...")
    synth_df = mcce.generate(test_factual.drop(y_col, axis=1), k=K)

In [None]:
# (4) Postprocess generated counterfactuals
print("Postprocessing counterfactuals with MCCE...")
mcce.postprocess(df, synth_df, test_factual, y_col, scaler=dataset.inverse_transform, cutoff=0.5)

timing = time.time() - start
print(timing)

mcce.results_sparse['time (seconds)'] = timing
# 20 minutes for give_me_some_credit

In [2]:
mcce.results_sparse.to_csv(f"/nr/samba/user/anr/pkg/MCCE_Python/{data_name}_mcce_results_k_{K}.csv", index=False)

NameError: name 'mcce' is not defined

In [16]:
dataset = OnlineCatalog("adult")

results = pd.read_csv(f"/nr/samba/user/anr/pkg/MCCE_Python/adult_mcce_results_k_{K}.csv")

dataset.inverse_transform(results.iloc[0:1])[['age', 'workclass', 'fnlwgt', 'education-num', 'marital-status', 'relationship', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']].iloc[0]

age                        39
workclass         Non-Private
fnlwgt                 112731
education-num              13
marital-status    Non-Married
relationship      Non-Husband
capital-gain             7430
capital-loss                0
hours-per-week             40
native-country             US
Name: 0, dtype: object

In [47]:
pd.set_option('display.max_columns', None)
dataset = OnlineCatalog("give_me_some_credit")

results = pd.read_csv(f"/nr/samba/user/anr/pkg/MCCE_Python/give_me_some_credit_mcce_results_k_{K}.csv")

results.rename(columns={'Unnamed: 0': 'index'}, inplace=True)
results.set_index(['index'], inplace=True)
temp = results.sort_values([results.index.name]).iloc[0:1]
dataset.inverse_transform(temp)[['age', 'RevolvingUtilizationOfUnsecuredLines', 'NumberOfTime30-59DaysPastDueNotWorse','DebtRatio', 'MonthlyIncome', 'NumberOfOpenCreditLinesAndLoans', 'NumberOfTimes90DaysLate', 'NumberRealEstateLoansOrLines', 'NumberOfTime60-89DaysPastDueNotWorse', 'NumberOfDependents']].iloc[0]

age                                       40.000000
RevolvingUtilizationOfUnsecuredLines       0.941073
NumberOfTime30-59DaysPastDueNotWorse       3.000000
DebtRatio                                  0.924615
MonthlyIncome                           5305.000000
NumberOfOpenCreditLinesAndLoans           10.000000
NumberOfTimes90DaysLate                    0.000000
NumberRealEstateLoansOrLines               1.000000
NumberOfTime60-89DaysPastDueNotWorse       1.000000
NumberOfDependents                         2.000000
Name: 9, dtype: float64

In [None]:
# print(mcce.results_sparse.)
print(mcce.results_sparse.L0.mean())
print(mcce.results_sparse.L1.mean())
print(mcce.results_sparse.L2.mean())

In [None]:
results_sparse = mcce.results_sparse
results_sparse.index.rename('index', inplace=True)
results_sparse.groupby('index').size().sort_values(ascending=False)