## Use MCCE method with CARLA package

In [None]:
import os
os.chdir("/Users/Redelmeier/Dropbox/pkg/MCCE_paper/mccepy/")
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [40]:
import pandas as pd

from carla.data.catalog import OnlineCatalog
from carla.models.catalog import MLModelCatalog
from carla.models.negative_instances import predict_negative_instances

from mcce.mcce import MCCE
from mcce.metrics import distance, feasibility, constraint_violation, success_rate

## Load data use CARLA OnlineCatalog class

In [41]:
dataset = OnlineCatalog('adult')

## Train multi-layer perceptron

In [42]:
ml_model = MLModelCatalog(dataset, 
                          model_type="ann", 
                          load_online=False, 
                          backend="pytorch"
                          )

ml_model.train(learning_rate=0.002,
               epochs=20,
               batch_size=1024,
               hidden_size=[18, 9, 3],
               force_train=False, # Will not train a new model
               )

Loaded model from /nr/samba/user/anr/carla/models/adult/ann_layers_18_9_3.pt
test accuracy for model: 0.8458387942332897


  x = self.softmax(x)


## Select observations to generate counterfactuals for

In [43]:
factuals = predict_negative_instances(ml_model, dataset.df)
test_factual = factuals.iloc[:5]

  x = self.softmax(x)


## Create objects to feed into MCCE method

In [44]:
y_col = dataset.target
cont_feat = dataset.continuous

cat_feat = dataset.categorical
cat_feat_encoded = dataset.encoder.get_feature_names(dataset.categorical)

dtypes = dict([(x, "float") for x in cont_feat])
for x in cat_feat_encoded:
    dtypes[x] = "category"
df = (dataset.df).astype(dtypes)

## Fit MCCE method

In [45]:
mcce = MCCE(dataset=dataset,
            model=ml_model)

print("Fit trees")
mcce.fit(df.drop(dataset.target, axis=1), dtypes)

print("Sample observations for the specific test observations")
cfs = mcce.generate(test_factual.drop(dataset.target, axis=1), k=100)

print("Process the sampled observations")
mcce.postprocess(cfs=cfs, test_factual=test_factual, cutoff=0.5)

Fit trees
Sample observations for the specific test observations
Process the sampled observations


  x = self.softmax(x)


## Print counterfactuals

In [51]:
cfs = mcce.results_sparse
cfs['income'] = test_factual['income'] # add back the original response

# invert the features to their original form
print("Original factuals:")
decoded_factuals = dataset.inverse_transform(test_factual)[dataset.inverse_transform(ml_model.get_ordered_features(test_factual)).columns]


Original factuals:


Index(['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss',
       'hours-per-week', 'marital-status', 'native-country', 'occupation',
       'race', 'relationship', 'sex', 'workclass'],
      dtype='object')

In [47]:
print("Generated counterfactuals:")
decoded_cfs = dataset.inverse_transform(cfs)[dataset.inverse_transform(ml_model.get_ordered_features(test_factual)).columns]
decoded_cfs

Generated counterfactuals:


Unnamed: 0,age,fnlwgt,education-num,capital-gain,capital-loss,...,occupation,race,relationship,sex,workclass
0,39.0,98975.0,13.0,8614.0,0.0,...,Managerial-Specialist,White,Non-Husband,Male,Non-Private
1,50.0,117496.0,13.0,0.0,0.0,...,Managerial-Specialist,White,Non-Husband,Male,Non-Private
2,38.0,50149.0,10.0,0.0,1887.0,...,Other,White,Non-Husband,Male,Private
3,53.0,287927.0,13.0,0.0,0.0,...,Managerial-Specialist,White,Non-Husband,Male,Private
4,28.0,129460.0,13.0,0.0,0.0,...,Managerial-Specialist,White,Non-Husband,Female,Private


## Calculate some metrics

In [49]:
distance_pd = pd.DataFrame(distance(cfs, test_factual, dataset))

feasibility_pd = pd.DataFrame(feasibility(cfs, df, dataset.df.columns), columns=['feasibility'])

const_pd = pd.DataFrame(constraint_violation(decoded_cfs, decoded_factuals, dataset), columns=['violation'])

success_pd = pd.DataFrame(success_rate(cfs[dataset.df.columns], ml_model), columns=['success'])


  x = self.softmax(x)


In [50]:
results = pd.concat([decoded_cfs, distance_pd, feasibility_pd, const_pd, success_pd], axis=1)
results

Unnamed: 0,age,fnlwgt,education-num,capital-gain,capital-loss,...,L1,L2,feasibility,violation,success
0,39.0,98975.0,13.0,8614.0,0.0,...,0.078918,0.004358,0.082174,0,1
1,50.0,117496.0,13.0,0.0,0.0,...,1.298638,1.076441,0.23734,0,1
2,38.0,50149.0,10.0,0.0,1887.0,...,1.611827,1.204639,0.449476,0,1
3,53.0,287927.0,13.0,0.0,0.0,...,3.435996,3.161296,0.303281,0,1
4,28.0,129460.0,13.0,0.0,0.0,...,2.141362,2.019983,0.033575,0,1
