# Generate Explainability Report with CashApp dataset using XAI

This notebook demonstrates how to generate explanations report using complier implemented in the XAI library.


## Steps
1. Create required input from the Cashapp model_data pkl
2. Evaluate the model performance with XAI report and generate a local explainer pkl
3. Load the explainer pkl while inference and explain the instance


***

In [1]:
import pickle
import os
from copy import deepcopy
from pprint import pprint
import numpy as np
import pandas as pd

### 1. Create required input from `sample_data.pkl`

In [2]:
from sklearn.externals import joblib
resource = joblib.load('_sample_data.pkl')



#### 1.1 Load Data

In [3]:
training_data = resource['training_data']
print(training_data.shape)
training_data.to_csv('train_data.csv',index=False)
print("feature data dumped : %s/train_data.csv" % os.getcwd())

training_label = resource['training_labels']
training_label = np.array(training_label)
print(training_label.shape)
np.savetxt('y_true.csv',training_label,delimiter=',')
print("label dumped : %s/y_true.csv" % os.getcwd())

all_data = deepcopy(training_data)
all_data['matching'] = training_label
print(all_data.shape)
all_data.to_csv('data.csv',index=False)
print("all data dumped : %s/data.csv" % os.getcwd())

(229953, 2)
feature data dumped : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/train_data.csv
(229953,)
label dumped : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/y_true.csv
(229953, 3)
all data dumped : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/data.csv


#### 1.2 Load Meta

In [4]:
feature_names = list(resource['feature_names'])
categorical_features = list(resource['categorical_features'])
categorical_names = dict(resource['categorical_names'])
class_names = list(resource['class_names'])

meta_data = dict()
meta_data['class_names'] = class_names
feature_meta = []
for idx,feature_name in enumerate(feature_names):
    if idx not in categorical_features:
        feature_meta.append({'name':feature_name, 'type':'numerical'})
    else:
        if idx in categorical_names.keys():
            feature_meta.append({'name':feature_name, 
                                 'type':'categorical', 
                                 'mapping':categorical_names[idx]})
        else:
            feature_meta.append({'name':feature_name, 
                                 'type':'categorical'})
meta_data['feature_types'] = feature_meta

import json
with open('feature_meta.json','w') as f:
    json.dump(meta_data,f)
    print("feature_meta dumped : %s/feature_meta.json" % os.getcwd())

with open('labels.json','w') as f:
    json.dump(class_names, f)
    print("class_label dumped : %s/labels.json" % os.getcwd())

feature_meta dumped : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/feature_meta.json
class_label dumped : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/labels.json


#### 1.3 Load Model

In [5]:
model = resource['model_instance']
print(model)
with open('model.pkl','wb') as f:
    pickle.dump(model,f)
    print("model dumped : %s/model.pkl" % os.getcwd())

with open('func.pkl', 'wb') as func_pkl:
    def predict_fn(x):
        return model.predict_proba(
            pd.DataFrame(x, columns=feature_names)).astype(float)
    pickle.dump(predict_fn,func_pkl)
    print("function call dumped : %s/func.pkl" % os.getcwd())

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=None, colsample_bytree=1, gamma=1.0,
              learning_rate=0.2, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=nan, n_estimators=100, n_jobs=1,
              nthread=1, objective='multi:softprob', random_state=97,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=True, subsample=1, verbosity=None)
model dumped : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/model.pkl
function call dumped : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/func.pkl


#### 1.3 Perform Training Evaluation

In [6]:
y_conf = model.predict_proba(training_data)
np.savetxt("y_conf.csv", y_conf, delimiter=",")
print("y_conf dumped : %s/y_conf.csv" % os.getcwd())

y_conf dumped : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/y_conf.csv


### 2. Involve XAI complier

In [7]:
import os
import sys
from pprint import pprint
sys.path.append('../../../')
from xai.compiler.base import Configuration, Controller

#### 2.1 Specify config file

In [8]:
json_config = 'basic-report-explainer.json'

#### 2.2  Initial compiler controller with config

In [9]:
controller = Controller(config=Configuration(json_config))
pprint(controller.config)

{'content_table': True,
 'contents': [{'desc': 'This section summarized the training performance',
               'sections': [{'component': {'attr': {'labels_file': 'labels.json',
                                                    'y_pred_file': 'y_conf.csv',
                                                    'y_true_file': 'y_true.csv'},
                                           'class': 'ClassificationEvaluationResult',
                                           'module': 'compiler',
                                           'package': 'xai'},
                             'title': 'Training Result'}],
               'title': 'Training Result'},
              {'desc': 'This section provides the analysis on feature',
               'sections': [{'component': {'_comment': 'refer to document '
                                                       'section xxxx',
                                           'attr': {'method': 'shap',
                                                   

#### 2.2  Finally compiler render

In [10]:
 controller.render()

  train_data = train_data.as_matrix()
  plt.tight_layout()
  plt.xlim([0, xlimit])


***

### Result

In [11]:
print("report generated : %s/cashapp-basic-report.pdf" % os.getcwd())

report generated : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/cashapp-basic-report.pdf


In [12]:
print("explainer generated : %s/explainer.pkl" % os.getcwd())

explainer generated : /Users/i309943/workspace/Explainable_AI/tutorials/compiler/cashapp/explainer.pkl


### Inference Explainer

In [13]:
import xai
from xai.explainer.explainer_factory import ExplainerFactory
from pprint import pprint

explainer = ExplainerFactory.get_explainer(domain=xai.DOMAIN.TABULAR, algorithm=xai.ALG.LIME)
explainer.load_explainer('explainer.pkl')
explanations = explainer.explain_instance(instance=training_data.values[0,:],num_features=5)
pprint(explanations)

{0: {'confidence': 0.999993085861206,
     'explanation': [{'feature': 'NUMSUFFIXSAME:MEMOLINE:ACCOUNTINGDOCUMENT_TRUNCATED '
                                 '> 0.00',
                      'score': -0.13365043117719663},
                     {'feature': 'WORDSAMESCORE:MEMOLINE:ORGANIZATIONBPNAME > '
                                 '0.00',
                      'score': -0.0012361622311423371}]},
 2: {'confidence': 3.849634140351554e-06,
     'explanation': [{'feature': 'WORDSAMESCORE:MEMOLINE:ORGANIZATIONBPNAME > '
                                 '0.00',
                      'score': 0.032739598722119255},
                     {'feature': 'NUMSUFFIXSAME:MEMOLINE:ACCOUNTINGDOCUMENT_TRUNCATED '
                                 '> 0.00',
                      'score': 0.024314783489174884}]}}
