# Pipeline

In [82]:
import os
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession, LocalPipelineSession

role = sagemaker.get_execution_role()
bucket = "pochingto-testing"
pipeline_session = PipelineSession(default_bucket=bucket)

In [83]:
sagemaker.__version__

'2.183.0'

In [84]:
# %%bash

# pip install -q --upgrade pip
# pip install -q --upgrade awscli boto3
# pip install -q --upgrade scikit-learn==1.3.1
# pip install -q --upgrade PyYAML==6.0
# pip install -q --upgrade sagemaker
# pip install -q --upgrade ipytest

In [85]:
# import sagemaker
# import importlib
# importlib.reload(sagemaker)
# sagemaker.__version__

In [86]:
import boto3

sagemaker_session = sagemaker.session.Session()
sagemaker_client = boto3.client("sagemaker")
iam_client = boto3.client("iam")
region = boto3.Session().region_name
bucket = "pochingto-testing"

In [87]:
ENDPOINT = "dogBreeds-endpoint"
DATA_CAPTURE_DESTINATION = f"s3://{bucket}/monitoring/data-capture"
MODEL_PACKAGE_GROUP = "dogBreeds"

In [88]:
config = {
    "session": pipeline_session,
    "instance_type": "ml.m5.xlarge",
    "image": None,
    "framework_version": "1.12",
    "py_version": "py38",
}

In [89]:
from sagemaker.workflow.steps import CacheConfig

cache_config = CacheConfig(enable_caching=True, expire_after="15d")

## Data preprocessing

In [90]:
# import importlib
# from preprocessing import preprocess_data

# importlib.reload(preprocess_data)

# import tempfile
# import shutil

# from pathlib import Path

# directory = tempfile.mkdtemp()
# data_dir = "all/"
# train_ratio = 0.8
# output_dir = Path(directory) / "output"

# preprocess_data.preprocess_data(data_dir, output_dir, train_ratio)

In [91]:
# shutil.rmtree(directory)

In [92]:
from sagemaker.workflow.pipeline_context import LocalPipelineSession
local_pipeline_session = LocalPipelineSession()

In [93]:
from sagemaker.workflow.parameters import ParameterString

dataset_location = ParameterString(
    name="dataset_location",
    default_value=f"s3://{bucket}/all",
)

In [94]:
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_definition_config import PipelineDefinitionConfig

pipeline_definition_config = PipelineDefinitionConfig(use_custom_job_prefix=True)

In [95]:
len(sorted(os.listdir('all/')))

134

In [96]:
dataset_location

ParameterString(name='dataset_location', parameter_type=<ParameterTypeEnum.STRING: 'String'>, default_value='s3://pochingto-testing/all')

In [97]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.pytorch.processing import PyTorchProcessor

# pytorch_processor = PyTorchProcessor(
#     base_job_name="evaluation-processor",
#     image_uri=config["image"],
#     framework_version=config["framework_version"],
#     py_version=config["py_version"],
#     instance_type=config["instance_type"],
#     instance_count=1,
#     role=role,
#     sagemaker_session=config["session"],
# )

# Define the script processor
pytorch_preprocessor = PyTorchProcessor(
    base_job_name="preprocess-data",
    image_uri=config["image"],
    framework_version=config["framework_version"],
    py_version=config["py_version"],
    instance_type=config["instance_type"],
    instance_count=1,
    role=role,
    sagemaker_session=config["session"],
    # sagemaker_session=local_pipeline_session
)

preprocessing_step = ProcessingStep(
    name="preprocess-data",
    step_args=pytorch_preprocessor.run(
        code='preprocess_data.py',
        source_dir='preprocessing', 
        inputs=[
            ProcessingInput(
                source=dataset_location,
                destination='/opt/ml/processing/input'
            )
        ],
        outputs=[
            ProcessingOutput(
                output_name="train",
                source='/opt/ml/processing/output/train',
                destination=f's3://{bucket}/output/train'
            ),
            ProcessingOutput(
                output_name="test",
                source='/opt/ml/processing/output/test',
                destination=f's3://{bucket}/output/test'
            ),
            ProcessingOutput(
                output_name="data-baseline",
                source='/opt/ml/processing/output/data-baseline',
                destination=f's3://{bucket}/output/data-baseline'
            )
        ]
    ),
    cache_config=cache_config
)

In [98]:
type(preprocessing_step)

sagemaker.workflow.steps.ProcessingStep

In [99]:
# dogbreed_pipeline = Pipeline(
#     name="dogbreeds-preprocessing-pipeline",
#     parameters=[dataset_location],
#     steps=[
#         preprocessing_step,
#     ],
#     pipeline_definition_config=pipeline_definition_config,
#     sagemaker_session=config["session"],
#     # sagemaker_session=local_pipeline_session
# )

# dogbreed_pipeline.upsert(role_arn=role)

In [100]:
# dogbreed_pipeline.create(
#     role_arn=sagemaker.get_execution_role(), 
#     description="local pipeline example"
# )

# execution = dogbreed_pipeline.start()

# steps = execution.list_steps()

# training_job_name = steps['PipelineExecutionSteps'][0]['Metadata']['TrainingJob']['Arn']

# step_outputs = local_pipeline_session.sagemaker_client.describe_training_job(TrainingJobName = training_job_name)

## Training

In [101]:
# !pip install --upgrade torch torchvision

In [102]:
# import importlib
# import train

# importlib.reload(train)

# import tempfile
# import shutil

# from pathlib import Path
# from train import main

# directory = tempfile.mkdtemp()
# data_dir = "all/"
# model_dir = Path(directory) / "model"
# output_dir = Path(directory) / "output"

# model_dir.mkdir(parents=True, exist_ok=True)
# output_dir.mkdir(parents=True, exist_ok=True)

# num_epochs = 1
# batch_size = 16
# debug = True

# main(str(data_dir), str(model_dir), str(output_dir), num_epochs, batch_size, debug)

In [103]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    base_job_name="dogbreeds-training",
    entry_point=f"train.py",

    hyperparameters={
        "epochs": 5,
        "batch_size": 32,
    },
    
    metric_definitions=[
        {"Name": "loss", "Regex": "Loss: ([0-9\\.]+)"},
        {"Name": "accuracy", "Regex": "Validation Accuracy: ([0-9\\.]+)"},
    ],
    image_uri=config["image"],
    framework_version=config["framework_version"],
    py_version=config["py_version"],
    instance_type="ml.g4dn.xlarge",
    instance_count=1,
    disable_profiler=True,
    sagemaker_session=config["session"],
    role=role
)

In [104]:
# print(preprocessing_step.properties.to_string()) #.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri.to_string())

In [105]:
from sagemaker.workflow.steps import TrainingStep
from sagemaker.inputs import TrainingInput

train_model_step = TrainingStep(
    name="train-model",
    step_args=estimator.fit(
        inputs={
            "train": TrainingInput(
                s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri
            )
        }
    ),
    cache_config=cache_config,
)

## Evaluation

In [106]:
# # if 'autoreload' not in get_ipython().extension_manager.loaded:
# #     %load_ext autoreload
# import importlib

# import tempfile
# import shutil

# from pathlib import Path
# from evaluate import evaluation

# importlib.reload(evaluation)
# # generate model.tar.gz
# import tarfile
# import os

# def tar_sagemaker_style(source_dir, output_filename):
#     with tarfile.open(output_filename, "w:gz") as tar:
#         for item in os.listdir(source_dir):
#             item_path = os.path.join(source_dir, item)
#             tar.add(item_path, arcname=item)

# # Tar the 'model/' folder
# output_file = 'model.tar.gz'
# tar_sagemaker_style(str(model_dir), model_dir / output_file)
# print("tared data")

# baseline_dir = output_dir.parent / "baseline"
# baseline_dir.mkdir(exist_ok=True)

# with tarfile.open(Path(directory) / "model.tar.gz") as tar:
#     tar.extractall(path=Path(model_dir))
# evaluation.main(str(model_dir), str(data_dir), str(output_dir), str(baseline_dir), debug=True)

In [107]:
from sagemaker.workflow.properties import PropertyFile

evaluation_report = PropertyFile(
    name="evaluation-report", output_name="evaluation", path="evaluation.json"
)

In [108]:
model_assets = train_model_step.properties.ModelArtifacts.S3ModelArtifacts
model_assets

<sagemaker.workflow.properties.Properties at 0x7f5fb30aaad0>

In [109]:
config

{'session': <sagemaker.workflow.pipeline_context.PipelineSession at 0x7f5fb1aeaed0>,
 'instance_type': 'ml.m5.xlarge',
 'image': None,
 'framework_version': '1.12',
 'py_version': 'py38'}

In [110]:
from sagemaker.pytorch.processing import PyTorchProcessor

pytorch_processor = PyTorchProcessor(
    base_job_name="evaluation-processor",
    image_uri=config["image"],
    framework_version=config["framework_version"],
    py_version=config["py_version"],
    instance_type=config["instance_type"],
    instance_count=1,
    role=role,
    sagemaker_session=config["session"],
)


In [111]:
evaluate_model_step = ProcessingStep(
    name="evaluate-model",
    step_args=pytorch_processor.run(
        code=f"evaluation.py",
        source_dir='evaluate',
        inputs=[
            ProcessingInput(
                source=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
            ProcessingInput(
                source=model_assets,
                destination="/opt/ml/processing/model",
            ),
        ],
        outputs=[
            ProcessingOutput(
                output_name="evaluation", source="/opt/ml/processing/evaluation"
            ),
            ProcessingOutput(
                output_name="model-baseline", source="/opt/ml/processing/baseline"
            ),
        ],
    ),
    property_files=[evaluation_report],
    cache_config=cache_config,
)

## Data & Model Quality Check Step

In [112]:
GROUND_TRUTH_LOCATION = f"s3://{bucket}/monitoring/groundtruth"
DATA_QUALITY_LOCATION = f"s3://{bucket}/monitoring/data-quality"
MODEL_QUALITY_LOCATION = f"s3://{bucket}/monitoring/model-quality"

In [113]:
from sagemaker.workflow.quality_check_step import (
    QualityCheckStep,
    DataQualityCheckConfig,
)
from sagemaker.workflow.check_job_config import CheckJobConfig
from sagemaker.model_monitor.dataset_format import DatasetFormat

data_quality_baseline_step = QualityCheckStep(
    name="generate-data-quality-baseline",
    check_job_config=CheckJobConfig(
        instance_type="ml.c5.xlarge",
        instance_count=1,
        volume_size_in_gb=20,
        sagemaker_session=pipeline_session,
        role=role,
    ),
    quality_check_config=DataQualityCheckConfig(
        baseline_dataset=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
            "data-baseline"
        ].S3Output.S3Uri,
        dataset_format=DatasetFormat.csv(header=True, output_columns_position="START"),
        output_s3_uri=DATA_QUALITY_LOCATION,
    ),
    model_package_group_name=MODEL_PACKAGE_GROUP,
    skip_check=True,
    register_new_baseline=True,
    cache_config=cache_config,
)

In [114]:
from sagemaker.workflow.quality_check_step import ModelQualityCheckConfig

model_quality_baseline_step = QualityCheckStep(
    name="generate-model-quality-baseline",
    check_job_config=CheckJobConfig(
        instance_type="ml.c5.xlarge",
        instance_count=1,
        volume_size_in_gb=20,
        sagemaker_session=pipeline_session,
        role=role,
    ),
    quality_check_config=ModelQualityCheckConfig(
        # We are going to use the output of the Transform Step to generate
        # the model quality baseline.
        baseline_dataset=evaluate_model_step.properties.ProcessingOutputConfig.Outputs[
            "model-baseline"
        ].S3Output.S3Uri,
        dataset_format=DatasetFormat.csv(header=True),

        # We need to specify the problem type and the fields where the prediction
        # and groundtruth are so the process knows how to interpret the results.
        problem_type="MulticlassClassification",
        
        # Since the data doesn't have headers, SageMaker will autocreate headers for it.
        # _c0 corresponds to the first column, and _c1 corresponds to the second column.
        ground_truth_attribute="Label",
        inference_attribute="Predicted",
        probability_attribute="Confidence",
        output_s3_uri=MODEL_QUALITY_LOCATION,
    ),
    model_package_group_name=MODEL_PACKAGE_GROUP,
    skip_check=True,
    register_new_baseline=True,
    cache_config=cache_config,
)

## Registering Model

In [115]:
config

{'session': <sagemaker.workflow.pipeline_context.PipelineSession at 0x7f5fb1aeaed0>,
 'instance_type': 'ml.m5.xlarge',
 'image': None,
 'framework_version': '1.12',
 'py_version': 'py38'}

In [116]:
MODEL_PACKAGE_GROUP = "dogBreeds"

In [117]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(
    model_data=model_assets,
    entry_point="inference.py",
    image_uri=config["image"],
    py_version=config["py_version"],
    framework_version=config["framework_version"],
    sagemaker_session=config["session"],
    role=role,
)

In [118]:
from sagemaker.model_metrics import ModelMetrics, MetricsSource
from sagemaker.workflow.functions import Join

# model_metrics = ModelMetrics(
#     model_statistics=MetricsSource(
#         s3_uri=Join(
#             on="/",
#             values=[
#                 evaluate_model_step.properties.ProcessingOutputConfig.Outputs[
#                     "evaluation"
#                 ].S3Output.S3Uri,
#                 "evaluation.json",
#             ],
#         ),
#         content_type="application/json",
#     )
# )

In [119]:
from sagemaker.drift_check_baselines import DriftCheckBaselines

model_metrics = ModelMetrics(
    model_data_statistics=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.CalculatedBaselineStatistics,
        content_type="application/json",
    ),
    model_data_constraints=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.CalculatedBaselineConstraints,
        content_type="application/json",
    ),
    model_statistics=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.CalculatedBaselineStatistics,
        content_type="application/json",
    ),
    model_constraints=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.CalculatedBaselineConstraints,
        content_type="application/json",
    ),
)

drift_check_baselines = DriftCheckBaselines(
    model_data_statistics=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.BaselineUsedForDriftCheckStatistics,
        content_type="application/json",
    ),
    model_data_constraints=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.BaselineUsedForDriftCheckConstraints,
        content_type="application/json",
    ),
    model_statistics=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.BaselineUsedForDriftCheckStatistics,
        content_type="application/json",
    ),
    model_constraints=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.BaselineUsedForDriftCheckConstraints,
        content_type="application/json",
    ),
)

In [120]:
from sagemaker.workflow.model_step import ModelStep

register_model_step = ModelStep(
    name="register-model",
    step_args=pytorch_model.register(
        model_package_group_name=MODEL_PACKAGE_GROUP,
        model_metrics=model_metrics,
        drift_check_baselines=drift_check_baselines,
        approval_status="PendingManualApproval",
        content_types=["application/json", "application/x-image"],
        response_types=["application/json"],
        inference_instances=["ml.m5.xlarge"],
        transform_instances=["ml.g4dn.xlarge"],
        domain="MACHINE_LEARNING",
        task="CLASSIFICATION",
        framework="PYTORCH",
        framework_version=config["framework_version"],
    ),
)

In [121]:
from sagemaker.workflow.parameters import ParameterFloat

accuracy_threshold = ParameterFloat(name="accuracy_threshold", default_value=0.50)

In [122]:
from sagemaker.workflow.fail_step import FailStep

fail_step = FailStep(
    name="fail",
    error_message=Join(
        on=" ",
        values=[
            "Execution failed because the model's accuracy was lower than",
            accuracy_threshold,
        ],
    ),
)

In [123]:
from sagemaker.workflow.functions import JsonGet
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo

condition = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step_name=evaluate_model_step.name,
        property_file=evaluation_report,
        json_path="metrics.accuracy.value",
    ),
    right=accuracy_threshold,
)

In [124]:
from sagemaker.workflow.condition_step import ConditionStep

condition_step = ConditionStep(
    name="check-model-accuracy",
    conditions=[condition],
    if_steps=[
        model_quality_baseline_step, 
        register_model_step
    ],
    else_steps=[fail_step],
)

In [125]:
training_pipeline = Pipeline(
    name="dogBreeds-training-pipeline",
    parameters=[dataset_location, accuracy_threshold],
    steps=[
        preprocessing_step,
        train_model_step,
        data_quality_baseline_step,
        evaluate_model_step,
        condition_step,
    ],
    pipeline_definition_config=pipeline_definition_config,
    sagemaker_session=config["session"],
)

In [134]:
training_pipeline.upsert(role_arn=role)

Using provided s3_resource
Using provided s3_resource


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.processing:Uploaded evaluate to s3://pochingto-testing/dogBreeds-training-pipeline/code/4db941f04ed748882e10300736cad13a/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://pochingto-testing/dogBreeds-training-pipeline/code/2c207c809cb0e0e9a1d77e5247f961f9/runproc.sh


Using provided s3_resource
Using provided s3_resource


INFO:sagemaker.processing:Uploaded preprocessing to s3://pochingto-testing/dogBreeds-training-pipeline/code/af7855aa5c91b75932b386e381849af6/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://pochingto-testing/dogBreeds-training-pipeline/code/0c8137ea235a6debf66cba8d901e144c/runproc.sh
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


Using provided s3_resource
Using provided s3_resource


INFO:sagemaker.processing:Uploaded evaluate to s3://pochingto-testing/dogBreeds-training-pipeline/code/4db941f04ed748882e10300736cad13a/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://pochingto-testing/dogBreeds-training-pipeline/code/2c207c809cb0e0e9a1d77e5247f961f9/runproc.sh


Using provided s3_resource
Using provided s3_resource


{'PipelineArn': 'arn:aws:sagemaker:us-east-1:681340771742:pipeline/dogBreeds-training-pipeline',
 'ResponseMetadata': {'RequestId': 'bbb87247-8f78-4bf9-b009-792fc6370c96',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'bbb87247-8f78-4bf9-b009-792fc6370c96',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '95',
   'date': 'Wed, 22 Nov 2023 03:17:05 GMT'},
  'RetryAttempts': 0}}

# Setup lambda

In [37]:
# setup role for lambda to deploy endpoint
import json

lambda_role_name = "lambda-deployment-role"
lambda_role_arn = None

try:
    response = iam_client.create_role(
        RoleName=lambda_role_name,
        AssumeRolePolicyDocument=json.dumps(
            {
                "Version": "2012-10-17",
                "Statement": [
                    {
                        "Effect": "Allow",
                        "Principal": {
                            "Service": ["lambda.amazonaws.com", "events.amazonaws.com"]
                        },
                        "Action": "sts:AssumeRole",
                    }
                ],
            }
        ),
        Description="Lambda Endpoint Deployment",
    )

    lambda_role_arn = response["Role"]["Arn"]

    iam_client.attach_role_policy(
        PolicyArn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole",
        RoleName=lambda_role_name,
    )

    iam_client.attach_role_policy(
        PolicyArn="arn:aws:iam::aws:policy/AmazonSageMakerFullAccess",
        RoleName=lambda_role_name,
    )

    print(f'Role "{lambda_role_name}" created with ARN "{lambda_role_arn}".')
except iam_client.exceptions.EntityAlreadyExistsException:
    response = iam_client.get_role(RoleName=lambda_role_name)
    lambda_role_arn = response["Role"]["Arn"]
    print(f'Role "{lambda_role_name}" already exists with ARN "{lambda_role_arn}".')

ClientError: An error occurred (AccessDenied) when calling the CreateRole operation: User: arn:aws:sts::681340771742:assumed-role/AmazonSageMaker-ExecutionRole-20230916T122655/SageMaker is not authorized to perform: iam:CreateRole on resource: arn:aws:iam::681340771742:role/lambda-deployment-role because no identity-based policy allows the iam:CreateRole action

In [40]:
bucket

'pochingto-testing'

In [41]:
from sagemaker.predictor import Predictor

ENDPOINT = "dogbreeds-endpoint"
DATA_CAPTURE_DESTINATION = f"s3://{bucket}/monitoring/data-capture"

In [43]:
DATA_CAPTURE_DESTINATION

'pochingto-testing/monitoring/data-capture'

In [44]:
role

'arn:aws:iam::681340771742:role/service-role/AmazonSageMaker-ExecutionRole-20230916T122655'

In [42]:
from sagemaker.lambda_helper import Lambda

lambda_role_arn = "arn:aws:iam::681340771742:role/lambda-deployment-role"
deploy_lambda_fn = Lambda(
    function_name="deploy_fn",
    execution_role_arn=lambda_role_arn,
    script="lambda.py",
    handler="lambda.lambda_handler",
    timeout=600,
    session=sagemaker_session,
    runtime="python3.11",
    environment={
        "Variables": {
            "ENDPOINT": ENDPOINT,
            "DATA_CAPTURE_DESTINATION": DATA_CAPTURE_DESTINATION,
            "ROLE": role,
        }
    },
)

lambda_response = deploy_lambda_fn.upsert()
lambda_response

ValueError: {'Message': 'User: arn:aws:sts::681340771742:assumed-role/AmazonSageMaker-ExecutionRole-20230916T122655/SageMaker is not authorized to perform: lambda:CreateFunction on resource: arn:aws:lambda:us-east-1:681340771742:function:deploy_fn because no identity-based policy allows the lambda:CreateFunction action', 'Code': 'AccessDeniedException'}

## Setup Eventbridge

In [46]:
MODEL_PACKAGE_GROUP

'dogBreeds'

In [47]:
event_pattern = f"""
{{
  "source": ["aws.sagemaker"],
  "detail-type": ["SageMaker Model Package State Change"],
  "detail": {{
    "ModelPackageGroupName": ["{MODEL_PACKAGE_GROUP}"],
    "ModelApprovalStatus": ["Approved"]
  }}
}}
"""

In [50]:
event_pattern

'\n{\n  "source": ["aws.sagemaker"],\n  "detail-type": ["SageMaker Model Package State Change"],\n  "detail": {\n    "ModelPackageGroupName": ["dogBreeds"],\n    "ModelApprovalStatus": ["Approved"]\n  }\n}\n'

In [48]:
events_client = boto3.client("events")
rule_response = events_client.put_rule(
    Name="PipelineModelApprovedRule",
    EventPattern=event_pattern,
    State="ENABLED",
    RoleArn=role,
)

ClientError: An error occurred (AccessDeniedException) when calling the PutRule operation: User: arn:aws:sts::681340771742:assumed-role/AmazonSageMaker-ExecutionRole-20230916T122655/SageMaker is not authorized to perform: events:PutRule on resource: arn:aws:events:us-east-1:681340771742:rule/PipelineModelApprovedRule because no identity-based policy allows the events:PutRule action

# Deploy

In [4]:
bucket = "pochingto-testing"

In [8]:
from sagemaker.predictor import Predictor

ENDPOINT = "dogBreeds-endpoint"
DATA_CAPTURE_DESTINATION = f"{bucket}/monitoring/data-capture"
MODEL_PACKAGE_GROUP = "dogBreeds"

In [13]:
import boto3
import sagemaker

sagemaker_client = boto3.client("sagemaker")
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [14]:
response = sagemaker_client.list_model_packages(
    ModelPackageGroupName=MODEL_PACKAGE_GROUP,
    ModelApprovalStatus="Approved",
    SortBy="CreationTime",
    MaxResults=1,
)

package = (
    response["ModelPackageSummaryList"][0]
    if response["ModelPackageSummaryList"]
    else None
)
package

{'ModelPackageGroupName': 'dogBreeds',
 'ModelPackageVersion': 5,
 'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:681340771742:model-package/dogBreeds/5',
 'CreationTime': datetime.datetime(2023, 11, 3, 2, 56, 10, 102000, tzinfo=tzlocal()),
 'ModelPackageStatus': 'Completed',
 'ModelApprovalStatus': 'Approved'}

In [15]:
from sagemaker import ModelPackage

model_package = ModelPackage(
    model_package_arn=package["ModelPackageArn"],
    sagemaker_session=sagemaker_session,
    role=role,
)

In [16]:
# model_package.deploy(
#     endpoint_name=ENDPOINT, 
#     initial_instance_count=1, 
#     instance_type=config["instance_type"]
# )

In [44]:
DATA_CAPTURE_LOCATION

NameError: name 'DATA_CAPTURE_LOCATION' is not defined

In [17]:
import time
import boto3

sagemaker_client = boto3.client("sagemaker")

endpoint_name = "dogBreeds-endpoint"
data_capture_destination = DATA_CAPTURE_DESTINATION # f"s3://{bucket}/monitoring/data-capture"

timestamp = time.strftime("%m%d%H%M%S", time.localtime())
model_name = f"{endpoint_name}-model-{timestamp}"
endpoint_config_name = f"{endpoint_name}-config-{timestamp}"
model_package_arn=package["ModelPackageArn"]

sagemaker_client.create_model(
    ModelName=model_name, 
    ExecutionRoleArn=role, 
    Containers=[{
        "ModelPackageName": model_package_arn
    }] 
)
sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[{
        "ModelName": model_name,
        "InstanceType": "ml.m5.xlarge",
        "InitialVariantWeight": 1,
        "InitialInstanceCount": 1,
        "VariantName": "AllTraffic",
    }],

    DataCaptureConfig={
        "EnableCapture": True,
        "InitialSamplingPercentage": 100,
        "DestinationS3Uri": data_capture_destination,
        "CaptureOptions": [
            {
                "CaptureMode": "Input"
            },
            {
                "CaptureMode": "Output"
            },
        ],
        "CaptureContentTypeHeader": {
            "JsonContentTypes": [
                "application/json",
                "application/x-image"
            ]
        }
    },
)

{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-1:681340771742:endpoint-config/dogbreeds-endpoint-config-1103183233',
 'ResponseMetadata': {'RequestId': 'e58dde11-b8f3-4020-a9f3-b5fed768e9ff',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e58dde11-b8f3-4020-a9f3-b5fed768e9ff',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '117',
   'date': 'Fri, 03 Nov 2023 18:32:34 GMT'},
  'RetryAttempts': 0}}

In [18]:
response = sagemaker_client.list_endpoints(NameContains=endpoint_name, MaxResults=1)

if len(response["Endpoints"]) == 0:
    sagemaker_client.create_endpoint(
        EndpointName=endpoint_name, 
        EndpointConfigName=endpoint_config_name,
    )
else:
    sagemaker_client.update_endpoint(
        EndpointName=endpoint_name, 
        EndpointConfigName=endpoint_config_name,
    )

## Evaluate

In [12]:
from PIL import Image
import io
import base64

def load_and_preprocess_image(image_path):
    # Load the image
    image = Image.open(image_path)
    # image = image.resize((224, 224))

    # Convert the image to bytes
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format='JPEG')  # Adjust format if needed
    img_byte_arr = img_byte_arr.getvalue()
    image_data = base64.b64encode(img_byte_arr).decode('utf-8')

    return image_data

image_path = './test_images/chow-chow.jpg'
image_data = load_and_preprocess_image(image_path)

In [13]:
image_data

'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAWGA68DASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDnC/bIpC4x1qEtx/Kk3H1ryLHoXHu+M5qAuM4yaVmyuM1CxxnHNUkBJvznFNY56Coi3pQrHp2piAqe1KFp4Xoc08Lx04qWwSGbMryKayVZC5HFMZSOn60JjsVsU0nFSsMcEVE3WtExWEB6VKgJ+lRKuT61ciTOPWmVFDRFuH9

In [47]:
# Decode the base64 string to binary
image_data = base64.b64decode(image_data)

# Convert binary data to an image object using PIL
image = Image.open(io.BytesIO(image_data))

In [65]:
# image
import json
json.dumps({"image_data": image_data})

'{"image_data": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAW+A9QDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDqPLNL5ftWmLX2pfsvtXs+0R5vs2ZXln3oKGtU2ntTTae1HtEHs2Zm00bTWibWmm2PpT50LkZQ20m2rxtz6U0wGjmQcrKgQ0oWrPkH0oEB9KOYXKyttpQtWDCfSm+WR2p8wcpGFqZEBoC4PSpVGKTY0hV

In [14]:
import boto3
import json

endpoint_name = "dogbreeds-endpoint"

client = boto3.client('sagemaker-runtime')

# content_type = "application/x-image"

# response = client.invoke_endpoint(
#     EndpointName=endpoint_name,
#     ContentType=content_type,
#     Body=image_data
# )

content_type = "application/json"

response = client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=json.dumps({"image_data": image_data})
)

# Parse the response
response_body = response['Body'].read()
predictions = json.loads(response_body)
print(predictions)

{'prediction': 'Chow_chow', 'confidence': 0.7634961009025574}


# Setup Monitoring

In [126]:
from time import sleep
from sagemaker.model_monitor import MonitoringExecution


def describe_monitoring_schedules(endpoint_name):
    schedules = []
    response = sagemaker_client.list_monitoring_schedules(EndpointName=endpoint_name)[
        "MonitoringScheduleSummaries"
    ]
    for item in response:
        name = item["MonitoringScheduleName"]
        schedule = {
            "Name": name,
            "Type": item["MonitoringType"],
        }

        description = sagemaker_client.describe_monitoring_schedule(
            MonitoringScheduleName=name
        )

        schedule["Status"] = description["MonitoringScheduleStatus"]

        last_execution_status = description["LastMonitoringExecutionSummary"][
            "MonitoringExecutionStatus"
        ]

        schedule["Last Execution Status"] = last_execution_status
        schedule["Last Execution Date"] = str(description["LastMonitoringExecutionSummary"]["LastModifiedTime"])

        if last_execution_status == "Failed":
            schedule["Failure Reason"] = description["LastMonitoringExecutionSummary"][
                "FailureReason"
            ]
        elif last_execution_status == "CompletedWithViolations":
            processing_job_arn = description["LastMonitoringExecutionSummary"][
                "ProcessingJobArn"
            ]
            execution = MonitoringExecution.from_processing_arn(
                sagemaker_session=sagemaker_session,
                processing_job_arn=processing_job_arn,
            )
            execution_destination = execution.output.destination

            violations_filepath = os.path.join(
                execution_destination, "constraint_violations.json"
            )
            violations = json.loads(S3Downloader.read_file(violations_filepath))[
                "violations"
            ]

            schedule["Violations"] = violations

        schedules.append(schedule)

    return schedules


def describe_monitoring_schedule(endpoint_name, monitoring_type):
    found = False

    schedules = describe_monitoring_schedules(endpoint_name)
    for schedule in schedules:
        if schedule["Type"] == monitoring_type:
            found = True
            print(json.dumps(schedule, indent=2))

    if not found:
        print(f"There's no {monitoring_type} Monitoring Schedule.")


def describe_data_monitoring_schedule(endpoint_name):
    describe_monitoring_schedule(endpoint_name, "DataQuality")


def describe_model_monitoring_schedule(endpoint_name):
    describe_monitoring_schedule(endpoint_name, "ModelQuality")


def delete_monitoring_schedule(endpoint_name, monitoring_type):
    attempts = 30
    found = False

    response = sagemaker_client.list_monitoring_schedules(EndpointName=endpoint_name)[
        "MonitoringScheduleSummaries"
    ]
    for item in response:
        if item["MonitoringType"] == monitoring_type:
            found = True
            
            summary = sagemaker_client.describe_monitoring_schedule(
                MonitoringScheduleName=item["MonitoringScheduleName"]
            )
            status = summary["MonitoringScheduleStatus"]

            if status == "Scheduled" and "LastMonitoringExecutionSummary" in summary and "MonitoringExecutionStatus" in summary["LastMonitoringExecutionSummary"]:
                status = summary["LastMonitoringExecutionSummary"]["MonitoringExecutionStatus"]

            while status in ("Pending", "InProgress") and attempts > 0:
                attempts -= 1
                print(
                    f"Monitoring schedule status: {status}. Waiting for it to finish."
                )
                sleep(30)

                status = sagemaker_client.describe_monitoring_schedule(
                    MonitoringScheduleName=item["MonitoringScheduleName"]
                )["MonitoringScheduleStatus"]

            if status not in ("Pending", "InProgress"):
                sagemaker_client.delete_monitoring_schedule(
                    MonitoringScheduleName=item["MonitoringScheduleName"]
                )
                print("Monitoring schedule deleted.")
            else:
                print("Waiting for monitoring schedule timed out")

    if not found:
        print(f"There's no {monitoring_type} Monitoring Schedule.")


def delete_data_monitoring_schedule(endpoint_name):
    delete_monitoring_schedule(endpoint_name, "DataQuality")


def delete_model_monitoring_schedule(endpoint_name):
    delete_monitoring_schedule(endpoint_name, "ModelQuality")

In [127]:
GROUND_TRUTH_LOCATION = f"s3://{bucket}/monitoring/groundtruth"
DATA_QUALITY_LOCATION = f"s3://{bucket}/monitoring/data-quality"
MODEL_QUALITY_LOCATION = f"s3://{bucket}/monitoring/model-quality"

In [128]:
GROUND_TRUTH_LOCATION

's3://pochingto-testing/monitoring/groundtruth'

## Data Monitoring

In [129]:
import json
import numpy as np
import io
import base64

from PIL import Image

def extract_rgb_features(image_data):
    img = Image.open(io.BytesIO(image_data))
    width, height = img.size
    img_array = np.array(img)
    
    columns = ['width', 'height', 'red_mean', 'red_std', 'green_mean', 'green_std', 'blue_mean', 'blue_std']
    if img_array.ndim == 3:
        red_channel = img_array[:, :, 0]
        green_channel = img_array[:, :, 1]
        blue_channel = img_array[:, :, 2]

        red_mean, red_std = np.mean(red_channel), np.std(red_channel)
        green_mean, green_std = np.mean(green_channel), np.std(green_channel)
        blue_mean, blue_std = np.mean(blue_channel), np.std(blue_channel)

        features = [width, height, red_mean, red_std, green_mean, green_std, blue_mean, blue_std]
    else:
        features = [width, height] + [-1.0] * 6
        
    return dict(zip(columns, features))

def preprocess_handler(inference_record):
    data = json.loads(inference_record.endpoint_input.data)
    image_data = data["image_data"]
    image_data = base64.b64decode(image_data)
    return extract_rgb_features(image_data)

In [130]:
DATA_QUALITY_PREPROCESSOR = "data_quality_preprocessor.py"

In [131]:
bucket = boto3.Session().resource("s3").Bucket(pipeline_session.default_bucket())
prefix = "dogbreeds/monitoring"
bucket.Object(os.path.join(prefix, DATA_QUALITY_PREPROCESSOR)).upload_file(
    str(DATA_QUALITY_PREPROCESSOR)
)
data_quality_preprocessor = (
    f"s3://{os.path.join(bucket.name, prefix, DATA_QUALITY_PREPROCESSOR)}"
)

In [132]:
bucket.name

'pochingto-testing'

In [133]:
from sagemaker.model_monitor import CronExpressionGenerator, DefaultModelMonitor

data_monitor = DefaultModelMonitor(
    instance_type=config["instance_type"],
    instance_count=1,
    max_runtime_in_seconds=3600,
    role=role,
)

data_monitor.create_monitoring_schedule(
    monitor_schedule_name="penguins-data-monitoring-schedule",
    endpoint_input=ENDPOINT,
    record_preprocessor_script=data_quality_preprocessor,
    statistics=f"{DATA_QUALITY_LOCATION}/statistics.json",
    constraints=f"{DATA_QUALITY_LOCATION}/constraints.json",
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    output_s3_uri=DATA_QUALITY_LOCATION,
    enable_cloudwatch_metrics=True,
)


Could not retrieve statistics file at location 's3://pochingto-testing/monitoring/data-quality/statistics.json'. To manually retrieve Statistics object from a given uri, use 'my_model_monitor.statistics(my_s3_uri)' or 'Statistics.from_s3_uri(my_s3_uri)'


NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.

In [None]:
describe_data_monitoring_schedule(ENDPOINT)

## Model Monitoring

In [None]:
from sagemaker.model_monitor import ModelQualityMonitor, EndpointInput

model_monitor = ModelQualityMonitor(
    instance_type=config["instance_type"],
    instance_count=1,
    max_runtime_in_seconds=1800,
    role=role
)

model_monitor.create_monitoring_schedule(
    monitor_schedule_name="penguins-model-monitoring-schedule",
    
    endpoint_input = EndpointInput(
        endpoint_name=ENDPOINT,
        inference_attribute="prediction",
        destination="/opt/ml/processing/input_data",
    ),
    
    problem_type="MulticlassClassification",
    ground_truth_input=GROUND_TRUTH_LOCATION,
    constraints=f"{MODEL_QUALITY_LOCATION}/constraints.json",
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    output_s3_uri=MODEL_QUALITY_LOCATION,
    enable_cloudwatch_metrics=True,
)

In [None]:
describe_model_monitoring_schedule(ENDPOINT)

## Generate Fake Ground Truth

In [None]:
import random
from datetime import datetime
from sagemaker.s3 import S3Uploader

records = []
for inference_id in range(len(data)):
    random.seed(inference_id)

    records.append(json.dumps({
        "groundTruthData": {
            "data": random.choice(["Adelie", "Chinstrap", "Gentoo"]),
            "encoding": "CSV",
        },
        "eventMetadata": {
            "eventId": str(inference_id),
        },
        "eventVersion": "0",
    }))

groundtruth_payload = "\n".join(records)
upload_time = datetime.utcnow()
uri = f"{GROUND_TRUTH_LOCATION}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
S3Uploader.upload_string_as_file_body(groundtruth_payload, uri)