In [None]:
from sagemaker import Session  

session = Session() 

bucket = session.default_bucket() 

prefix = "sagemaker/bias_explain" 

region = session.boto_region_name 

# Define IAM role 

from sagemaker import get_execution_role 

import pandas as pd 

import numpy as np 

import os 

import boto3  

role = get_execution_role() 

s3_client = boto3.client("s3") 

In [None]:
training_data = pd.read_csv("data/churn.csv").dropna() 

training_data.head() 

In [None]:
from sklearn.model_selection import train_test_split 

churn_train, churn_test = train_test_split (training_data, test_size=0.2) 

In [None]:
from sklearn import preprocessing
def number_encode_features(df): 

    result = df.copy() 

    encoders = {} 

    for column in result.columns: 

        if result.dtypes[column] == np.object: 

            encoders[column] = preprocessing.LabelEncoder() 

            result[column] = encoders[column].fit_transform(result[column].fillna("None")) 
    return result, encoders

In [None]:
churn_train = pd.concat([churn_train["Exited"], churn_train.drop(["Exited"], axis=1)], axis=1)
churn_train, _ = number_encode_features(churn_train)
churn_train.to_csv("data/train_churn.csv", index=False, header=False)

churn_test, _ = number_encode_features(churn_test)
churn_features = churn_test.drop(["Exited"], axis=1)
churn_target = churn_test["Exited"]
churn_features.to_csv("data/test_churn.csv", index=False, header=False)

In [None]:
from sagemaker.s3 import S3Uploader 

from sagemaker.inputs import TrainingInput 

train_uri = S3Uploader.upload("data/train_churn.csv", "s3://{}/{}".format(bucket, prefix)) 

train_input = TrainingInput(train_uri, content_type="csv") 

test_uri = S3Uploader.upload("data/test_churn.csv", "s3://{}/{}".format(bucket, prefix)) 

In [None]:
from sagemaker.image_uris import retrieve 

from sagemaker.estimator import Estimator 

container = retrieve("xgboost", region, version="1.2-1") 

xgb = Estimator(container,role, instance_count=1,instance_type="ml.m5.xlarge", disable_profiler=True,sagemaker_session=session,) 

xgb.set_hyperparameters(max_depth=5, eta=0.2,gamma=4,min_child_weight=6,subsample=0.8,objective="binary:logistic",num_round=800,) 

xgb.fit({"train": train_input}, logs=False) 

In [None]:
model_name = "churn-clarify-model" 

model = xgb.create_model(name=model_name) 

container_def = model.prepare_container_def() 

session.create_model(model_name, role, container_def) 

In [None]:
from sagemaker import clarify 

clarify_processor = clarify.SageMakerClarifyProcessor( 

    role=role, instance_count=1, instance_type="ml.m5.xlarge", sagemaker_session=session) 

In [None]:
bias_report_output_path = "s3://{}/{}/clarify-bias".format(bucket, prefix) 

bias_data_config = clarify.DataConfig( 

    s3_data_input_path=train_uri, 

    s3_output_path=bias_report_output_path, 

    label="Exited", 

    headers=churn_train.columns.to_list(), 

    dataset_type="text/csv") 

In [None]:
model_config = clarify.ModelConfig( 

    model_name=model_name, instance_type="ml.m5.xlarge", 

    instance_count=1,accept_type="text/csv", 

content_type="text/csv",) 

In [None]:
predictions_config = clarify.ModelPredictedLabelConfig(probability_threshold=0.8) 

In [None]:
bias_config = clarify.BiasConfig( 

    label_values_or_threshold=[1], facet_name="Gender", facet_values_or_threshold=[0]) 

In [None]:
clarify_processor.run_bias( 

    data_config=bias_data_config, 

    bias_config=bias_config, 

    model_config=model_config, 

    model_predicted_label_config=predictions_config, 

    pre_training_methods="all", 

    post_training_methods="all") 

In [None]:
shap_config = clarify.SHAPConfig( 

    baseline=[churn_features.iloc[0].values.tolist()], 

    num_samples=15, 

    agg_method="mean_abs", 

    save_local_shap_values=True,) 

In [None]:
explainability_output_path = "s3://{}/{}/clarify-explainability".format(bucket, prefix) 

explainability_data_config = clarify.DataConfig( 

    s3_data_input_path=train_uri, 

    s3_output_path=explainability_output_path, 

    label="Exited", 

    headers=churn_train.columns.to_list(), 

    dataset_type="text/csv") 

In [None]:
clarify_processor.run_explainability( 

    data_config=explainability_data_config, 

    model_config=model_config, 

    explainability_config=shap_config,) 