# Detecting posttraining bias with SageMaker Clarify

<img align="left" width="130" src="https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Extra/cover-small-padded.png"/>

This notebook contains the code to help readers work through one of the recipes of the book [Machine Learning with Amazon SageMaker Cookbook: 80 proven recipes for data scientists and developers to perform ML experiments and deployments](https://www.amazon.com/Machine-Learning-Amazon-SageMaker-Cookbook/dp/1800567030)

### How to do it...

In [None]:
%store -r s3_bucket_name
%store -r prefix
%store -r training_data_path

In [None]:
import sagemaker

session = sagemaker.Session()
region = session.boto_region_name
role = sagemaker.get_execution_role()

In [None]:
s3_training_data_path = training_data_path
s3_output_path = f"s3://{s3_bucket_name}/{prefix}/output"

In [None]:
!aws s3 cp {s3_training_data_path} tmp/training_data.csv

In [None]:
import pandas as pd

training_data = pd.read_csv("tmp/training_data.csv")
training_data

In [None]:
from sagemaker.image_uris import retrieve

container = retrieve('xgboost', region, version='1.2-1')

In [None]:
from sagemaker.estimator import Estimator

estimator = Estimator(
    container,
    role,
    instance_count=1,
    instance_type='ml.m5.large',
    sagemaker_session=session
)

In [None]:
estimator.set_hyperparameters(
    objective='binary:logistic',
    max_depth=8,
    eta=0.1,
    min_child_weight=4,
    num_round=500
)

In [None]:
from sagemaker.inputs import TrainingInput

train_input = TrainingInput(
    s3_training_data_path, 
    content_type='csv'
)

In [None]:
%%time

estimator.fit({'train': train_input}, wait='True')

In [None]:
import random
from string import ascii_uppercase

def generate_model_name():
    chars = random.choices(ascii_uppercase, k=5)
    output = 'model-' + ''.join(chars)
    return output

In [None]:
model_name = generate_model_name()
model_name

In [None]:
model = estimator.create_model(name=model_name)

In [None]:
type(model)

In [None]:
model.__dict__

In [None]:
container_def = model.prepare_container_def()
container_def

In [None]:
session.create_model(
    model_name,
    role,
    container_def
)

In [None]:
from sagemaker.clarify import SageMakerClarifyProcessor

processor = SageMakerClarifyProcessor(
    role=role,                                                  
    instance_count=1,                                                  
    instance_type='ml.m5.large',
    sagemaker_session=session
)

In [None]:
from sagemaker.clarify import DataConfig

data_config = DataConfig(
    s3_data_input_path=s3_training_data_path,
    s3_output_path=s3_output_path,
    label='approved',
    headers=training_data.columns.to_list(),
    dataset_type='text/csv'
)

data_config.__dict__

In [None]:
from sagemaker.clarify import ModelConfig

model_config = ModelConfig(
    model_name=model_name,
    instance_type='ml.c5.xlarge',
    instance_count=1,
    accept_type='text/csv'
)

In [None]:
from sagemaker.clarify import ModelPredictedLabelConfig

predictions_config = ModelPredictedLabelConfig(
    probability_threshold=0.5
)

In [None]:
from sagemaker.clarify import BiasConfig

bias_config = BiasConfig(
    label_values_or_threshold=[1],              
    facet_name='sex',                             
)

In [None]:
%%time

processor.run_post_training_bias(
    data_config=data_config, 
    data_bias_config=bias_config,
    methods=['DPPL', 'RD'],
    model_config=model_config,
    model_predicted_label_config=predictions_config
)

In [None]:
output = processor.latest_job.outputs[0]
output_destination = output.destination
output_destination

In [None]:
!aws s3 cp {output_destination}/ tmp/ --recursive

In [None]:
!ls -lahF tmp/

In [None]:
!cat tmp/analysis.json

In [None]:
%store model_name
model_name