In [52]:
%store -r s3_bucket_name
%store -r prefix
%store -r training_data_path
%store -r test_data_path
%store -r model_name

In [54]:
import sagemaker

session = sagemaker.Session()
region = session.boto_region_name
role = sagemaker.get_execution_role()

In [55]:
s3_training_data_path = training_data_path
s3_test_data_path = test_data_path
s3_output_path = f"s3://{s3_bucket_name}/{prefix}/output"

In [56]:
!aws s3 cp {s3_training_data_path} tmp/training_data.csv
!aws s3 cp {s3_test_data_path} tmp/test_data.csv

download: s3://sagemaker-cookbook-bucket/chapter07/input/training_data.csv to tmp/training_data.csv
download: s3://sagemaker-cookbook-bucket/chapter07/input/test_data.csv to tmp/test_data.csv


In [57]:
import pandas as pd

training_data = pd.read_csv("tmp/training_data.csv")
test_data = pd.read_csv("tmp/test_data.csv")

target = test_data['label']
features = test_data.drop(columns=['label'])
features.to_csv('tmp/test_features.csv', index=False, header=False)

In [60]:
features

Unnamed: 0,a,b,c,d
0,-10.488853,0.632800,-79,5
1,8.455742,4.183267,-7,-83
2,-15.466566,-0.372287,19,39
3,-3.134794,-4.258036,9,39
4,-8.362027,-9.802120,23,75
...,...,...,...,...
995,0.659784,1.414005,50,-36
996,-12.388167,-3.860623,2,-49
997,-4.294561,-8.472893,-41,-14
998,-6.044883,-5.261760,-49,-97


In [61]:
base = f"s3://{s3_bucket_name}/{prefix}/input"
s3_feature_path = f"{base}/test_features.csv"

!aws s3 cp tmp/test_features.csv {s3_feature_path}

upload: tmp/test_features.csv to s3://sagemaker-cookbook-bucket/chapter07/input/test_features.csv


In [62]:
from sagemaker.clarify import ModelConfig

model_config = ModelConfig(
    model_name=model_name,
    instance_type='ml.c5.xlarge',
    instance_count=1,
    accept_type='text/csv'
)

In [63]:
from sagemaker.clarify import SageMakerClarifyProcessor

processor = SageMakerClarifyProcessor(
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    sagemaker_session=session
)

In [64]:
baseline = [features.iloc[0].values.tolist()]
baseline

[[-10.488853298702775, 0.6328000699807923, -79.0, 5.0]]

In [65]:
from sagemaker.clarify import SHAPConfig

shap_config = SHAPConfig(
    baseline=baseline,
    num_samples=30,
    agg_method='median'
)

In [66]:
headers = training_data.columns.to_list()

In [67]:
from sagemaker.clarify import DataConfig

data_config = DataConfig(
    s3_data_input_path=s3_training_data_path,
    s3_output_path=s3_output_path,
    label='label',
    headers=headers,
    dataset_type='text/csv'
)

In [68]:
%%time

processor.run_explainability(
    data_config=data_config,       
    model_config=model_config,                                 
    explainability_config=shap_config
)


Job Name:  Clarify-Explainability-2021-05-24-14-53-00-659
Inputs:  [{'InputName': 'dataset', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-cookbook-bucket/chapter07/input/training_data.csv', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'analysis_config', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-cookbook-bucket/chapter07/output/analysis_config.json', 'LocalPath': '/opt/ml/processing/input/config', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'analysis_result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-cookbook-bucket/chapter07/output', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}}]
................................[34mINFO:sagemaker-clarify-processing:Starting SageMaker Clar

In [69]:
output = processor.latest_job.outputs[0]
output_destination = output.destination
output_destination

's3://sagemaker-cookbook-bucket/chapter07/output'

In [70]:
!aws s3 cp {output_destination}/ tmp/ --recursive

download: s3://sagemaker-cookbook-bucket/chapter07/output/analysis_config.json to tmp/analysis_config.json
download: s3://sagemaker-cookbook-bucket/chapter07/output/analysis.json to tmp/analysis.json
download: s3://sagemaker-cookbook-bucket/chapter07/output/explanations_shap/baseline.csv to tmp/explanations_shap/baseline.csv
download: s3://sagemaker-cookbook-bucket/chapter07/output/report.ipynb to tmp/report.ipynb
download: s3://sagemaker-cookbook-bucket/chapter07/output/explanations_shap/out.csv to tmp/explanations_shap/out.csv
download: s3://sagemaker-cookbook-bucket/chapter07/output/report.html to tmp/report.html
download: s3://sagemaker-cookbook-bucket/chapter07/output/report.pdf to tmp/report.pdf


In [71]:
!ls -lahF tmp/

total 876K
drwxr-xr-x 3 root root 6.0K May 24 15:06 ./
drwxr-xr-x 4 root root 6.0K May 24 15:04 ../
-rw-r--r-- 1 root root  423 May 24 15:05 analysis.json
-rw-r--r-- 1 root root  466 May 24 14:53 analysis_config.json
drwxr-xr-x 2 root root 6.0K May 24 15:06 explanations_shap/
-rw-r--r-- 1 root root 280K May 24 15:05 report.html
-rw-r--r-- 1 root root  14K May 24 15:05 report.ipynb
-rw-r--r-- 1 root root  37K May 24 15:05 report.pdf
-rw-r--r-- 1 root root  46K May 24 13:26 test_data.csv
-rw-r--r-- 1 root root  46K May 24 13:26 test_data_no_header.csv
-rw-r--r-- 1 root root  45K May 24 14:52 test_features.csv
-rw-r--r-- 1 root root 139K May 24 13:26 training_data.csv
-rw-r--r-- 1 root root 139K May 24 13:26 training_data_no_header.csv
-rw-r--r-- 1 root root  47K May 24 13:26 validation_data.csv
-rw-r--r-- 1 root root  47K May 24 13:26 validation_data_no_header.csv


In [72]:
!cat tmp/analysis.json

{
    "version": "1.0",
    "explanations": {
        "kernel_shap": {
            "label0": {
                "global_shap_values": {
                    "a": -0.23615066885249675,
                    "b": 0.23931414559483516,
                    "c": -0.16817792626097805,
                    "d": -0.0255478889434015
                },
                "expected_value": 0.8622586131095886
            }
        }
    }
}