Copyright (c) 2020. Cognitive Scale Inc. All rights reserved.
Licensed under CognitiveScale Example Code [License](https://github.com/CognitiveScale/cortex-certifai-examples/blob/7998b8a481fccd467463deb1fc46d19622079b0e/LICENSE.md)


## Running Cortex Certifai fairness evaluation on xgboost model to predict adult income

- Description: Each dataset row represents the attribute values for de-identified individual. The models predict the income bracket of the person as <=50K or >=50K
- Dataset Source: UCI [Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/census+income)
- This example uses an XGBoostClassifier
- Example can be worked locally by installing the dependencies listed below
- dependencies
    - installed Certifai toolkit
    - xgboost (`conda install -c conda-forge xgboost`)
    - ipython
    - matplotlib
    - jupyter


In [1]:
# necessary imports
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# imports for building certifai scan
from certifai.scanner.builder import (CertifaiScanBuilder, CertifaiPredictorWrapper, CertifaiModel, CertifaiModelMetric,
                                      CertifaiDataset, CertifaiGroupingFeature, CertifaiDatasetSource,
                                      CertifaiPredictionTask, CertifaiTaskOutcomes, CertifaiOutcomeValue)
from certifai.scanner.report_utils import scores, construct_scores_dataframe
from certifai.common.utils.encoding import CatEncoder

### prepare data for model training

In [2]:
# load data into dataframe
df = pd.read_csv('adult_income-prepped.csv')

In [3]:
# Separate outcome
label_column = 'income'
y = df[label_column]
X_raw = df.drop(label_column, axis=1)

# remove some additional non helpful columns
# X = X_raw.drop(["fnlwgt", "capital-loss"], axis=1)
rm=["fnlwgt", "capital-loss"]
dropped_indexes_list = [i for i,col in enumerate(X_raw.columns.to_list()) if col in rm]
final_list=X_raw.columns.to_list()
for i in rm:   
    final_list.remove(i)
X = X_raw[final_list]

In [4]:
# create train/test set from the cleaned dataframe(after removing non-useful columns)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [5]:
# create encoder for categorical columns
cat_columns = [
   'workclass', 
   'education', 
   'marital-status', 
   'occupation', 
   'relationship',
   'race',
   'gender',
   'native-country'
           ]
encoder = CatEncoder(cat_columns, X_train)

### set hyperparams and start model train

In [6]:
# encode training data to be used to for model training 
encoded_X_train = encoder(X_train.values)
encoded_X_test = encoder(X_test.values)

In [7]:
# train the xgboost model
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(encoded_X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eval_metric='logloss',
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=12,
              num_parallel_tree=1, random_state=0, reg_alpha=0, reg_lambda=1,
              scale_pos_weight=1, subsample=1, tree_method='exact',
              use_label_encoder=False, validate_parameters=1, verbosity=None)

### calcuate model accuracy on test set

In [8]:
# calculate accuracy on test-set.
acc = xgb.score(encoded_X_test, y_test)
acc

0.8687685535878801

### using cortex certifai scan api's to set up model scanning

- before running below section make sure you have necessary packages for cortex certifai installed
- copy the toolkit path to `certifai_toolkit_path` variable and run the below cell to install the required packages to initiate a certifai model scan

In [9]:
# check version of certifai installed
from certifai.scanner.version import  get_version
get_version()

'1.3.8'

## Run scans with hard predict

Setup a scan definition that does not use soft output. Run a preflight and explanations scan.

In [10]:
xgb_wrapped_model = CertifaiPredictorWrapper(xgb,encoder=encoder)
# test to assert wrapped certifai model predicts == raw model predicts
assert (xgb_wrapped_model.model.predict(X_test.values) == 
        
        xgb.predict(encoded_X_test)).all()

In [11]:
# Create the scan object from scratch using the ScanBuilder class

# First define the possible prediction outcomes
task = CertifaiPredictionTask(CertifaiTaskOutcomes.classification(
    [
        CertifaiOutcomeValue(1, name='income > 50K', favorable=True),
        CertifaiOutcomeValue(0, name='income < 50K')
    ]),
    prediction_description='Determine whether income greater than 50K or less')

scan = CertifaiScanBuilder.create('test_use_case',
                                  prediction_task=task)

# Add our local models
first_model = CertifaiModel('XGB',
                            local_predictor=xgb_wrapped_model)
scan.add_model(first_model)

# Add the eval dataset
eval_dataset = CertifaiDataset('evaluation',
                               CertifaiDatasetSource.dataframe(X))
scan.add_dataset(eval_dataset)

explan_dataset = CertifaiDataset('explanation',
                               CertifaiDatasetSource.dataframe(X[:10]))
scan.add_dataset(explan_dataset)


# Setup an explanation scan
scan.add_evaluation_type('explanation')
scan.evaluation_dataset_id = 'evaluation'
scan.explanation_dataset_id = 'explanation'

# Because the dataset contains a ground truth outcome column which the model does not
# expect to receive as input we need to state that in the dataset schema (since it cannot
# be inferred from the CSV)
scan.dataset_schema.outcome_feature_name = 'income'

# Run the scan.
# By default this will write the results into individual report files (one per model and evaluation
# type) in the 'reports' directory relative to the Jupyter root.  This may be disabled by specifying
# `write_reports=False` as below
# The result is a dictionary of dictionaries of reports.  The top level dict key is the evaluation type
# and the second level key is model id.
# Reports saved as JSON (which `write_reports=True` will do) may be visualized in the console app
scan.run_preflight()

Starting Preflight Scan
[--------------------] 2021-04-19 14:19:28.068847 - 0 of 3 checks (0.0% complete) - Running model nondeterminism preflight check for model XGB
[######--------------] 2021-04-19 14:19:28.110753 - 1 of 3 checks (33.33% complete) - Running scan time estimate preflight check for model XGB
[#############-------] 2021-04-19 14:20:58.970944 - 2 of 3 checks (66.67% complete) - Running unknown outcome class preflight check for model XGB
[####################] 2021-04-19 14:20:58.977491 - 3 of 3 checks (100.0% complete) - Finished all preflight checks for model XGB


  'messages': ['Passed model non determinism check',
   'Expected time for explanation analysis is 17 seconds',
   'Model XGB passed time estimation check',
   'Passed unknown outcome classes check'],
  'errors': []}}

In [12]:
result=scan.run()

Starting scan with model_use_case_id: 'test_use_case' and scan_id: '39e1da7675b5', total estimated time is 1 minutes
[--------------------] 2021-04-19 14:20:59.144896 - 0 of 1 reports (0.0% complete) - Running explanation evaluation for model: XGB, estimated time is 17 seconds
[####################] 2021-04-19 14:21:10.651839 - 1 of 1 reports (100.0% complete) - Completed all evaluations


### Run scans using soft output

Modify the scan definition to run a scan using soft output. We'll do a preflight test, and then explanations including Shap, which requires soft output.


In [13]:
# Set up wrapper with soft outputs
import sys
sys.path.append("../utils")
from sklearn_soft_wrapper import SkLearnSoftWrapper
xgb_soft_wrapped_model = CertifaiPredictorWrapper(SkLearnSoftWrapper(xgb),
                                    soft_predictions=True,
                                    encoder=encoder)

In [14]:
scan2 = CertifaiScanBuilder.create('test_use_case',
                                  prediction_task=task)

scan2.add_model(CertifaiModel('SoftXGB',
                            local_predictor=xgb_soft_wrapped_model))
scan2.add_dataset(eval_dataset)
scan2.add_dataset(explan_dataset)
scan2.add_evaluation_type('explanation')
scan2.evaluation_dataset_id = 'evaluation'
scan2.explanation_dataset_id = 'explanation'

# Add Shap to explanations
scan2.add_explanation_type('shap')
scan2.run_preflight()


[--------------------] 2021-04-19 14:21:10.811406 - 0 of 3 checks (0.0% complete) - Starting Preflight Scan
[--------------------] 2021-04-19 14:21:10.811527 - 0 of 3 checks (0.0% complete) - Running model nondeterminism preflight check for model SoftXGB
[######--------------] 2021-04-19 14:21:10.855993 - 1 of 3 checks (33.33% complete) - Running scan time estimate preflight check for model SoftXGB
[#############-------] 2021-04-19 14:22:32.730645 - 2 of 3 checks (66.67% complete) - Running unknown outcome class preflight check for model SoftXGB
[####################] 2021-04-19 14:22:32.737630 - 3 of 3 checks (100.0% complete) - Finished all preflight checks for model SoftXGB


  'messages': ['Passed model non determinism check',
   'Expected time for explanation analysis is 16 seconds',
   'Model SoftXGB passed time estimation check',
   'Passed unknown outcome classes check'],
  'errors': []}}

In [15]:
result = scan2.run(write_reports=True)

[--------------------] 2021-04-19 14:22:32.890377 - 0 of 1 reports (0.0% complete) - Starting scan with model_use_case_id: 'test_use_case' and scan_id: '46e887c03ae5', total estimated time is 1 minutes
[--------------------] 2021-04-19 14:22:32.890530 - 0 of 1 reports (0.0% complete) - Running explanation evaluation for model: SoftXGB, estimated time is 16 seconds
[####################] 2021-04-19 14:22:50.017508 - 1 of 1 reports (100.0% complete) - Completed all evaluations


In [16]:
xgb_soft_wrapped_model.model.soft_predict(X[:10].values)

array([[9.98465240e-01, 1.53476745e-03],
       [8.64778340e-01, 1.35221660e-01],
       [4.46438372e-01, 5.53561628e-01],
       [7.61495471e-01, 2.38504559e-01],
       [6.94274902e-04, 9.99305725e-01],
       [7.51609027e-01, 2.48390958e-01],
       [9.99894857e-01, 1.05137420e-04],
       [9.95557725e-01, 4.44229366e-03],
       [9.98467922e-01, 1.53206184e-03],
       [2.37099528e-02, 9.76290047e-01]])