# Pipeline

In [2]:
import os
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession, LocalPipelineSession

role = sagemaker.get_execution_role()
bucket = "pochingto-testing"
pipeline_session = PipelineSession(default_bucket=bucket)

In [3]:
sagemaker.__version__

'2.183.0'

In [4]:
# %%bash

# pip install -q --upgrade pip
# pip install -q --upgrade awscli boto3
# pip install -q --upgrade scikit-learn==1.3.1
# pip install -q --upgrade PyYAML==6.0
# pip install -q --upgrade sagemaker
# pip install -q --upgrade ipytest

In [5]:
# import sagemaker
# import importlib
# importlib.reload(sagemaker)
# sagemaker.__version__

In [6]:
import boto3

sagemaker_session = sagemaker.session.Session()
sagemaker_client = boto3.client("sagemaker")
iam_client = boto3.client("iam")
region = boto3.Session().region_name
bucket = "pochingto-testing"



In [7]:
ENDPOINT = "dogBreeds-endpoint"
DATA_CAPTURE_DESTINATION = f"{bucket}/monitoring/data-capture"
MODEL_PACKAGE_GROUP = "dogBreeds"

In [8]:
config = {
    "session": pipeline_session,
    "instance_type": "ml.m5.xlarge",
    "image": None,
    "framework_version": "1.12",
    "py_version": "py38",
}

In [9]:
from sagemaker.workflow.steps import CacheConfig

cache_config = CacheConfig(enable_caching=True, expire_after="15d")

## Data preprocessing

In [73]:
import importlib
from preprocessing import preprocess_data

importlib.reload(preprocess_data)

import tempfile
import shutil

from pathlib import Path

directory = tempfile.mkdtemp()
data_dir = "all/"
train_ratio = 0.8
output_dir = Path(directory) / "output"

preprocess_data.preprocess_data(data_dir, output_dir, train_ratio)

Splitting data...
Splitting class folder: Affenpinscher
Splitting class folder: Afghan_hound
Splitting class folder: Airedale_terrier
Splitting class folder: Akita
Splitting class folder: Alaskan_malamute
Splitting class folder: American_eskimo_dog
Splitting class folder: American_foxhound
Splitting class folder: American_staffordshire_terrier
Splitting class folder: American_water_spaniel
Splitting class folder: Anatolian_shepherd_dog
Splitting class folder: Australian_cattle_dog
Splitting class folder: Australian_shepherd
Splitting class folder: Australian_terrier
Splitting class folder: Basenji
Splitting class folder: Basset_hound
Splitting class folder: Beagle
Splitting class folder: Bearded_collie
Splitting class folder: Beauceron
Splitting class folder: Bedlington_terrier
Splitting class folder: Belgian_malinois
Splitting class folder: Belgian_sheepdog
Splitting class folder: Belgian_tervuren
Splitting class folder: Bernese_mountain_dog
Splitting class folder: Bichon_frise
Splitt

In [52]:
# !cat /tmp/tmpp0tanr90/output/data-baseline/image_stats.csv
shutil.rmtree(directory)

In [10]:
from sagemaker.workflow.pipeline_context import LocalPipelineSession
local_pipeline_session = LocalPipelineSession()

In [11]:
from sagemaker.workflow.parameters import ParameterString

dataset_location = ParameterString(
    name="dataset_location",
    default_value=f"s3://{bucket}/all",
)

In [12]:
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_definition_config import PipelineDefinitionConfig

pipeline_definition_config = PipelineDefinitionConfig(use_custom_job_prefix=True)

In [13]:
len(sorted(os.listdir('all/')))

134

In [14]:
dataset_location

ParameterString(name='dataset_location', parameter_type=<ParameterTypeEnum.STRING: 'String'>, default_value='s3://pochingto-testing/all')

In [15]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.pytorch.processing import PyTorchProcessor

# pytorch_processor = PyTorchProcessor(
#     base_job_name="evaluation-processor",
#     image_uri=config["image"],
#     framework_version=config["framework_version"],
#     py_version=config["py_version"],
#     instance_type=config["instance_type"],
#     instance_count=1,
#     role=role,
#     sagemaker_session=config["session"],
# )

# Define the script processor
pytorch_preprocessor = PyTorchProcessor(
    base_job_name="preprocess-data",
    image_uri=config["image"],
    framework_version=config["framework_version"],
    py_version=config["py_version"],
    instance_type=config["instance_type"],
    instance_count=1,
    role=role,
    sagemaker_session=config["session"],
    # sagemaker_session=local_pipeline_session
)

preprocessing_step = ProcessingStep(
    name="preprocess-data",
    step_args=pytorch_preprocessor.run(
        code='preprocess_data.py',
        source_dir='preprocessing', 
        inputs=[
            ProcessingInput(
                source=dataset_location,
                destination='/opt/ml/processing/input'
            )
        ],
        outputs=[
            ProcessingOutput(
                output_name="train",
                source='/opt/ml/processing/output/train',
                destination=f's3://{bucket}/output/train'
            ),
            ProcessingOutput(
                output_name="test",
                source='/opt/ml/processing/output/test',
                destination=f's3://{bucket}/output/test'
            ),
            ProcessingOutput(
                output_name="data-baseline",
                source='/opt/ml/processing/output/data-baseline',
                destination=f's3://{bucket}/output/data-baseline'
            )
        ]
    ),
    cache_config=cache_config
)



In [16]:
type(preprocessing_step)

sagemaker.workflow.steps.ProcessingStep

In [17]:
# dogbreed_pipeline = Pipeline(
#     name="dogbreeds-preprocessing-pipeline",
#     parameters=[dataset_location],
#     steps=[
#         preprocessing_step,
#     ],
#     pipeline_definition_config=pipeline_definition_config,
#     sagemaker_session=config["session"],
#     # sagemaker_session=local_pipeline_session
# )

# dogbreed_pipeline.upsert(role_arn=role)

In [18]:
# dogbreed_pipeline.create(
#     role_arn=sagemaker.get_execution_role(), 
#     description="local pipeline example"
# )

# execution = dogbreed_pipeline.start()

# steps = execution.list_steps()

# training_job_name = steps['PipelineExecutionSteps'][0]['Metadata']['TrainingJob']['Arn']

# step_outputs = local_pipeline_session.sagemaker_client.describe_training_job(TrainingJobName = training_job_name)

## Training

In [23]:
!pip install --upgrade torch torchvision

Collecting torchvision
  Downloading torchvision-0.14.1-cp37-cp37m-manylinux1_x86_64.whl (24.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.2/24.2 MB[0m [31m33.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mInstalling collected packages: torchvision
Successfully installed torchvision-0.14.1
[0m

In [16]:
import importlib
import train

importlib.reload(train)

import tempfile
import shutil

from pathlib import Path
from train import main

directory = tempfile.mkdtemp()
data_dir = "all/"
model_dir = Path(directory) / "model"
output_dir = Path(directory) / "output"

model_dir.mkdir(parents=True, exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)

num_epochs = 1
batch_size = 16
debug = True

main(str(data_dir), str(model_dir), str(output_dir), num_epochs, batch_size, debug)

ModuleNotFoundError: No module named 'torch'

In [57]:
shutil.rmtree(directory)

In [19]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    base_job_name="dogbreeds-training",
    entry_point=f"train.py",

    hyperparameters={
        "epochs": 5,
        "batch_size": 32,
    },
    
    metric_definitions=[
        {"Name": "loss", "Regex": "Loss: ([0-9\\.]+)"},
        {"Name": "accuracy", "Regex": "Validation Accuracy: ([0-9\\.]+)"},
    ],
    image_uri=config["image"],
    framework_version=config["framework_version"],
    py_version=config["py_version"],
    instance_type="ml.g4dn.xlarge",
    instance_count=1,
    disable_profiler=True,
    sagemaker_session=config["session"],
    role=role
)

In [21]:
# print(preprocessing_step.properties.to_string()) #.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri.to_string())

In [22]:
from sagemaker.workflow.steps import TrainingStep
from sagemaker.inputs import TrainingInput

train_model_step = TrainingStep(
    name="train-model",
    step_args=estimator.fit(
        inputs={
            "train": TrainingInput(
                s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri
            )
        }
    ),
    cache_config=cache_config,
)

## Evaluation

In [12]:
!ls /tmp/tmpjg2v2xz9/model/

data_classes.txt  model.pt


In [17]:
with open(model_dir / "data_classes.txt", "r") as f:
    classes = [cls.strip() for cls in f.readlines()]
classes

['Affenpinscher',
 'Afghan_hound',
 'Airedale_terrier',
 'Akita',
 'Alaskan_malamute',
 'American_eskimo_dog',
 'American_foxhound',
 'American_staffordshire_terrier',
 'American_water_spaniel',
 'Anatolian_shepherd_dog',
 'Australian_cattle_dog',
 'Australian_shepherd',
 'Australian_terrier',
 'Basenji',
 'Basset_hound',
 'Beagle',
 'Bearded_collie',
 'Beauceron',
 'Bedlington_terrier',
 'Belgian_malinois',
 'Belgian_sheepdog',
 'Belgian_tervuren',
 'Bernese_mountain_dog',
 'Bichon_frise',
 'Black_and_tan_coonhound',
 'Black_russian_terrier',
 'Bloodhound',
 'Bluetick_coonhound',
 'Border_collie',
 'Border_terrier',
 'Borzoi',
 'Boston_terrier',
 'Bouvier_des_flandres',
 'Boxer',
 'Boykin_spaniel',
 'Briard',
 'Brittany',
 'Brussels_griffon',
 'Bull_terrier',
 'Bulldog',
 'Bullmastiff',
 'Cairn_terrier',
 'Canaan_dog',
 'Cane_corso',
 'Cardigan_welsh_corgi',
 'Cavalier_king_charles_spaniel',
 'Chesapeake_bay_retriever',
 'Chihuahua',
 'Chinese_crested',
 'Chinese_shar-pei',
 'Chow_cho

In [26]:
from torchvision import datasets
all_data = datasets.ImageFolder(root=data_dir)
all_data.classes = classes
all_data.class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
all_data.class_to_idx

{'Affenpinscher': 0,
 'Afghan_hound': 1,
 'Airedale_terrier': 2,
 'Akita': 3,
 'Alaskan_malamute': 4,
 'American_eskimo_dog': 5,
 'American_foxhound': 6,
 'American_staffordshire_terrier': 7,
 'American_water_spaniel': 8,
 'Anatolian_shepherd_dog': 9,
 'Australian_cattle_dog': 10,
 'Australian_shepherd': 11,
 'Australian_terrier': 12,
 'Basenji': 13,
 'Basset_hound': 14,
 'Beagle': 15,
 'Bearded_collie': 16,
 'Beauceron': 17,
 'Bedlington_terrier': 18,
 'Belgian_malinois': 19,
 'Belgian_sheepdog': 20,
 'Belgian_tervuren': 21,
 'Bernese_mountain_dog': 22,
 'Bichon_frise': 23,
 'Black_and_tan_coonhound': 24,
 'Black_russian_terrier': 25,
 'Bloodhound': 26,
 'Bluetick_coonhound': 27,
 'Border_collie': 28,
 'Border_terrier': 29,
 'Borzoi': 30,
 'Boston_terrier': 31,
 'Bouvier_des_flandres': 32,
 'Boxer': 33,
 'Boykin_spaniel': 34,
 'Briard': 35,
 'Brittany': 36,
 'Brussels_griffon': 37,
 'Bull_terrier': 38,
 'Bulldog': 39,
 'Bullmastiff': 40,
 'Cairn_terrier': 41,
 'Canaan_dog': 42,
 'Cane

In [28]:
output_dir.parent

PosixPath('/tmp/tmp_vofh1d4')

In [35]:
# if 'autoreload' not in get_ipython().extension_manager.loaded:
#     %load_ext autoreload
import importlib

import tempfile
import shutil

from pathlib import Path
from evaluate import evaluation

importlib.reload(evaluation)
# generate model.tar.gz
import tarfile
import os

def tar_sagemaker_style(source_dir, output_filename):
    with tarfile.open(output_filename, "w:gz") as tar:
        for item in os.listdir(source_dir):
            item_path = os.path.join(source_dir, item)
            tar.add(item_path, arcname=item)

# Tar the 'model/' folder
output_file = 'model.tar.gz'
tar_sagemaker_style(str(model_dir), model_dir / output_file)
print("tared data")

baseline_dir = output_dir.parent / "baseline"
baseline_dir.mkdir(exist_ok=True)

# with tarfile.open(Path(directory) / "model.tar.gz") as tar:
#     tar.extractall(path=Path(model_dir))
evaluation.main(str(model_dir), str(data_dir), str(output_dir), str(baseline_dir), debug=True)

tared data
Extracting model_dir...
extracted tar.
Loading model...
Test Accuracy: 0.0%
Test weighted precision: 0.0
Test weighted recall: 0.0
Written evaluation.json .
Writing baseline csv...
Wrote /tmp/tmp_vofh1d4/baseline/evaluation_baseline.csv


In [42]:
!cat /tmp/tmpjg2v2xz9/output/evaluation_baseline.csv

Predicted,Label,Confidence
123,0,0.058272272
128,0,0.042744916
43,0,0.040311657
112,0,0.043824345
112,0,0.043053087
17,0,0.034501385
17,0,0.03910346
43,0,0.049315706
78,0,0.02734374
46,0,0.054395616
46,0,0.05734981
123,0,0.037385754
123,0,0.04410225
17,0,0.02573959
43,0,0.033720877
123,0,0.041246813
112,0,0.047379896
123,0,0.065215066
112,0,0.051347382
46,0,0.049985155
0,0,0.030542245
128,0,0.084609374
112,0,0.057425596
112,0,0.036075264
46,0,0.03006444
43,0,0.051023073
112,0,0.052787412
46,0,0.031601362
43,0,0.03916564
112,0,0.030664597
123,0,0.02805526
112,0,0.04982964


In [74]:
!ls /tmp/tmpnclu3kzg/model/

data_classes.txt  model.pt


In [22]:
print(directory)
!cat /tmp/tmpf8kvn5ak/output/evaluation.json

/tmp/tmpdtjbje2o
cat: /tmp/tmpf8kvn5ak/output/evaluation.json: No such file or directory


In [23]:
shutil.rmtree(directory)

In [25]:
from sagemaker.workflow.properties import PropertyFile

evaluation_report = PropertyFile(
    name="evaluation-report", output_name="evaluation", path="evaluation.json"
)

In [26]:
model_assets = train_model_step.properties.ModelArtifacts.S3ModelArtifacts
model_assets

<sagemaker.workflow.properties.Properties at 0x7f79150cf150>

In [27]:
config

{'session': <sagemaker.workflow.pipeline_context.PipelineSession at 0x7f7950533b90>,
 'instance_type': 'ml.m5.xlarge',
 'image': None,
 'framework_version': '1.12',
 'py_version': 'py38'}

In [28]:
from sagemaker.pytorch.processing import PyTorchProcessor

pytorch_processor = PyTorchProcessor(
    base_job_name="evaluation-processor",
    image_uri=config["image"],
    framework_version=config["framework_version"],
    py_version=config["py_version"],
    instance_type=config["instance_type"],
    instance_count=1,
    role=role,
    sagemaker_session=config["session"],
)


In [29]:
evaluate_model_step = ProcessingStep(
    name="evaluate-model",
    step_args=pytorch_processor.run(
        code=f"evaluation.py",
        source_dir='evaluate',
        inputs=[
            ProcessingInput(
                source=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
            ProcessingInput(
                source=model_assets,
                destination="/opt/ml/processing/model",
            ),
        ],
        outputs=[
            ProcessingOutput(
                output_name="evaluation", source="/opt/ml/processing/evaluation"
            ),
            ProcessingOutput(
                output_name="model-baseline", source="/opt/ml/processing/baseline"
            ),
        ],
    ),
    property_files=[evaluation_report],
    cache_config=cache_config,
)

## Data & Model Quality Check Step

In [30]:
GROUND_TRUTH_LOCATION = f"{bucket}/monitoring/groundtruth"
DATA_QUALITY_LOCATION = f"{bucket}/monitoring/data-quality"
MODEL_QUALITY_LOCATION = f"{bucket}/monitoring/model-quality"

In [31]:
from sagemaker.workflow.quality_check_step import (
    QualityCheckStep,
    DataQualityCheckConfig,
)
from sagemaker.workflow.check_job_config import CheckJobConfig
from sagemaker.model_monitor.dataset_format import DatasetFormat

data_quality_baseline_step = QualityCheckStep(
    name="generate-data-quality-baseline",
    check_job_config=CheckJobConfig(
        instance_type="ml.c5.xlarge",
        instance_count=1,
        volume_size_in_gb=20,
        sagemaker_session=pipeline_session,
        role=role,
    ),
    quality_check_config=DataQualityCheckConfig(
        baseline_dataset=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
            "data-baseline"
        ].S3Output.S3Uri,
        dataset_format=DatasetFormat.csv(header=True, output_columns_position="START"),
        output_s3_uri=DATA_QUALITY_LOCATION,
    ),
    model_package_group_name=MODEL_PACKAGE_GROUP,
    skip_check=True,
    register_new_baseline=True,
    cache_config=cache_config,
)

In [32]:
from sagemaker.workflow.quality_check_step import ModelQualityCheckConfig

model_quality_baseline_step = QualityCheckStep(
    name="generate-model-quality-baseline",
    check_job_config=CheckJobConfig(
        instance_type="ml.c5.xlarge",
        instance_count=1,
        volume_size_in_gb=20,
        sagemaker_session=pipeline_session,
        role=role,
    ),
    quality_check_config=ModelQualityCheckConfig(
        # We are going to use the output of the Transform Step to generate
        # the model quality baseline.
        baseline_dataset=evaluate_model_step.properties.ProcessingOutputConfig.Outputs[
            "model-baseline"
        ].S3Output.S3Uri,
        dataset_format=DatasetFormat.csv(header=True),

        # We need to specify the problem type and the fields where the prediction
        # and groundtruth are so the process knows how to interpret the results.
        problem_type="MulticlassClassification",
        
        # Since the data doesn't have headers, SageMaker will autocreate headers for it.
        # _c0 corresponds to the first column, and _c1 corresponds to the second column.
        ground_truth_attribute="Label",
        inference_attribute="Predicted",
        probability_attribute="Confidence",
        output_s3_uri=MODEL_QUALITY_LOCATION,
    ),
    model_package_group_name=MODEL_PACKAGE_GROUP,
    skip_check=True,
    register_new_baseline=True,
    cache_config=cache_config,
)

## Registering Model

In [33]:
config

{'session': <sagemaker.workflow.pipeline_context.PipelineSession at 0x7f7950533b90>,
 'instance_type': 'ml.m5.xlarge',
 'image': None,
 'framework_version': '1.12',
 'py_version': 'py38'}

In [34]:
MODEL_PACKAGE_GROUP = "dogBreeds"

In [35]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(
    model_data=model_assets,
    entry_point="inference.py",
    image_uri=config["image"],
    py_version=config["py_version"],
    framework_version=config["framework_version"],
    sagemaker_session=config["session"],
    role=role,
)

In [36]:
from sagemaker.model_metrics import ModelMetrics, MetricsSource
from sagemaker.workflow.functions import Join

# model_metrics = ModelMetrics(
#     model_statistics=MetricsSource(
#         s3_uri=Join(
#             on="/",
#             values=[
#                 evaluate_model_step.properties.ProcessingOutputConfig.Outputs[
#                     "evaluation"
#                 ].S3Output.S3Uri,
#                 "evaluation.json",
#             ],
#         ),
#         content_type="application/json",
#     )
# )

In [37]:
from sagemaker.drift_check_baselines import DriftCheckBaselines

model_metrics = ModelMetrics(
    model_data_statistics=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.CalculatedBaselineStatistics,
        content_type="application/json",
    ),
    model_data_constraints=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.CalculatedBaselineConstraints,
        content_type="application/json",
    ),
    model_statistics=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.CalculatedBaselineStatistics,
        content_type="application/json",
    ),
    model_constraints=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.CalculatedBaselineConstraints,
        content_type="application/json",
    ),
)

drift_check_baselines = DriftCheckBaselines(
    model_data_statistics=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.BaselineUsedForDriftCheckStatistics,
        content_type="application/json",
    ),
    model_data_constraints=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.BaselineUsedForDriftCheckConstraints,
        content_type="application/json",
    ),
    model_statistics=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.BaselineUsedForDriftCheckStatistics,
        content_type="application/json",
    ),
    model_constraints=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.BaselineUsedForDriftCheckConstraints,
        content_type="application/json",
    ),
)

In [38]:
from sagemaker.workflow.model_step import ModelStep

register_model_step = ModelStep(
    name="register-model",
    step_args=pytorch_model.register(
        model_package_group_name=MODEL_PACKAGE_GROUP,
        model_metrics=model_metrics,
        drift_check_baselines=drift_check_baselines,
        approval_status="PendingManualApproval",
        content_types=["application/x-image"],
        response_types=["application/json"],
        inference_instances=["ml.m5.xlarge"],
        transform_instances=["ml.g4dn.xlarge"],
        domain="MACHINE_LEARNING",
        task="CLASSIFICATION",
        framework="PYTORCH",
        framework_version=config["framework_version"],
    ),
)

In [39]:
from sagemaker.workflow.parameters import ParameterFloat

accuracy_threshold = ParameterFloat(name="accuracy_threshold", default_value=0.50)

In [40]:
from sagemaker.workflow.fail_step import FailStep

fail_step = FailStep(
    name="fail",
    error_message=Join(
        on=" ",
        values=[
            "Execution failed because the model's accuracy was lower than",
            accuracy_threshold,
        ],
    ),
)

In [41]:
from sagemaker.workflow.functions import JsonGet
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo

condition = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step_name=evaluate_model_step.name,
        property_file=evaluation_report,
        json_path="metrics.accuracy.value",
    ),
    right=accuracy_threshold,
)

In [42]:
from sagemaker.workflow.condition_step import ConditionStep

condition_step = ConditionStep(
    name="check-model-accuracy",
    conditions=[condition],
    if_steps=[
        model_quality_baseline_step, 
        register_model_step
    ],
    else_steps=[fail_step],
)

In [None]:
training_pipeline = Pipeline(
    name="dogBreeds-training-pipeline",
    parameters=[dataset_location, accuracy_threshold],
    steps=[
        preprocessing_step,
        train_model_step,
        data_quality_baseline_step,
        evaluate_model_step,
        condition_step,
    ],
    pipeline_definition_config=pipeline_definition_config,
    sagemaker_session=config["session"],
)

training_pipeline.upsert(role_arn=role)

INFO:sagemaker.processing:Uploaded preprocessing to s3://pochingto-testing/dogBreeds-training-pipeline/code/af7855aa5c91b75932b386e381849af6/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://pochingto-testing/dogBreeds-training-pipeline/code/0c8137ea235a6debf66cba8d901e144c/runproc.sh


Using provided s3_resource
Using provided s3_resource


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.processing:Uploaded evaluate to s3://pochingto-testing/dogBreeds-training-pipeline/code/4db941f04ed748882e10300736cad13a/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://pochingto-testing/dogBreeds-training-pipeline/code/2c207c809cb0e0e9a1d77e5247f961f9/runproc.sh


# Setup lambda

In [37]:
# setup role for lambda to deploy endpoint
import json

lambda_role_name = "lambda-deployment-role"
lambda_role_arn = None

try:
    response = iam_client.create_role(
        RoleName=lambda_role_name,
        AssumeRolePolicyDocument=json.dumps(
            {
                "Version": "2012-10-17",
                "Statement": [
                    {
                        "Effect": "Allow",
                        "Principal": {
                            "Service": ["lambda.amazonaws.com", "events.amazonaws.com"]
                        },
                        "Action": "sts:AssumeRole",
                    }
                ],
            }
        ),
        Description="Lambda Endpoint Deployment",
    )

    lambda_role_arn = response["Role"]["Arn"]

    iam_client.attach_role_policy(
        PolicyArn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole",
        RoleName=lambda_role_name,
    )

    iam_client.attach_role_policy(
        PolicyArn="arn:aws:iam::aws:policy/AmazonSageMakerFullAccess",
        RoleName=lambda_role_name,
    )

    print(f'Role "{lambda_role_name}" created with ARN "{lambda_role_arn}".')
except iam_client.exceptions.EntityAlreadyExistsException:
    response = iam_client.get_role(RoleName=lambda_role_name)
    lambda_role_arn = response["Role"]["Arn"]
    print(f'Role "{lambda_role_name}" already exists with ARN "{lambda_role_arn}".')

ClientError: An error occurred (AccessDenied) when calling the CreateRole operation: User: arn:aws:sts::681340771742:assumed-role/AmazonSageMaker-ExecutionRole-20230916T122655/SageMaker is not authorized to perform: iam:CreateRole on resource: arn:aws:iam::681340771742:role/lambda-deployment-role because no identity-based policy allows the iam:CreateRole action

In [40]:
bucket

'pochingto-testing'

In [41]:
from sagemaker.predictor import Predictor

ENDPOINT = "dogbreeds-endpoint"
DATA_CAPTURE_DESTINATION = f"{bucket}/monitoring/data-capture"

In [43]:
DATA_CAPTURE_DESTINATION

'pochingto-testing/monitoring/data-capture'

In [44]:
role

'arn:aws:iam::681340771742:role/service-role/AmazonSageMaker-ExecutionRole-20230916T122655'

In [42]:
from sagemaker.lambda_helper import Lambda

lambda_role_arn = "arn:aws:iam::681340771742:role/lambda-deployment-role"
deploy_lambda_fn = Lambda(
    function_name="deploy_fn",
    execution_role_arn=lambda_role_arn,
    script="lambda.py",
    handler="lambda.lambda_handler",
    timeout=600,
    session=sagemaker_session,
    runtime="python3.11",
    environment={
        "Variables": {
            "ENDPOINT": ENDPOINT,
            "DATA_CAPTURE_DESTINATION": DATA_CAPTURE_DESTINATION,
            "ROLE": role,
        }
    },
)

lambda_response = deploy_lambda_fn.upsert()
lambda_response

ValueError: {'Message': 'User: arn:aws:sts::681340771742:assumed-role/AmazonSageMaker-ExecutionRole-20230916T122655/SageMaker is not authorized to perform: lambda:CreateFunction on resource: arn:aws:lambda:us-east-1:681340771742:function:deploy_fn because no identity-based policy allows the lambda:CreateFunction action', 'Code': 'AccessDeniedException'}

## Setup Eventbridge

In [46]:
MODEL_PACKAGE_GROUP

'dogBreeds'

In [47]:
event_pattern = f"""
{{
  "source": ["aws.sagemaker"],
  "detail-type": ["SageMaker Model Package State Change"],
  "detail": {{
    "ModelPackageGroupName": ["{MODEL_PACKAGE_GROUP}"],
    "ModelApprovalStatus": ["Approved"]
  }}
}}
"""

In [50]:
event_pattern

'\n{\n  "source": ["aws.sagemaker"],\n  "detail-type": ["SageMaker Model Package State Change"],\n  "detail": {\n    "ModelPackageGroupName": ["dogBreeds"],\n    "ModelApprovalStatus": ["Approved"]\n  }\n}\n'

In [48]:
events_client = boto3.client("events")
rule_response = events_client.put_rule(
    Name="PipelineModelApprovedRule",
    EventPattern=event_pattern,
    State="ENABLED",
    RoleArn=role,
)

ClientError: An error occurred (AccessDeniedException) when calling the PutRule operation: User: arn:aws:sts::681340771742:assumed-role/AmazonSageMaker-ExecutionRole-20230916T122655/SageMaker is not authorized to perform: events:PutRule on resource: arn:aws:events:us-east-1:681340771742:rule/PipelineModelApprovedRule because no identity-based policy allows the events:PutRule action

# Deploy

In [4]:
bucket = "pochingto-testing"

In [8]:
from sagemaker.predictor import Predictor

ENDPOINT = "dogBreeds-endpoint"
DATA_CAPTURE_DESTINATION = f"{bucket}/monitoring/data-capture"
MODEL_PACKAGE_GROUP = "dogBreeds"

In [13]:
import boto3
import sagemaker

sagemaker_client = boto3.client("sagemaker")
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [14]:
response = sagemaker_client.list_model_packages(
    ModelPackageGroupName=MODEL_PACKAGE_GROUP,
    ModelApprovalStatus="Approved",
    SortBy="CreationTime",
    MaxResults=1,
)

package = (
    response["ModelPackageSummaryList"][0]
    if response["ModelPackageSummaryList"]
    else None
)
package

{'ModelPackageGroupName': 'dogBreeds',
 'ModelPackageVersion': 5,
 'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:681340771742:model-package/dogBreeds/5',
 'CreationTime': datetime.datetime(2023, 11, 3, 2, 56, 10, 102000, tzinfo=tzlocal()),
 'ModelPackageStatus': 'Completed',
 'ModelApprovalStatus': 'Approved'}

In [15]:
from sagemaker import ModelPackage

model_package = ModelPackage(
    model_package_arn=package["ModelPackageArn"],
    sagemaker_session=sagemaker_session,
    role=role,
)

In [16]:
# model_package.deploy(
#     endpoint_name=ENDPOINT, 
#     initial_instance_count=1, 
#     instance_type=config["instance_type"]
# )

In [44]:
DATA_CAPTURE_LOCATION

NameError: name 'DATA_CAPTURE_LOCATION' is not defined

In [17]:
import time
import boto3

sagemaker_client = boto3.client("sagemaker")

endpoint_name = "dogBreeds-endpoint"
data_capture_destination = DATA_CAPTURE_DESTINATION # f"s3://{bucket}/monitoring/data-capture"

timestamp = time.strftime("%m%d%H%M%S", time.localtime())
model_name = f"{endpoint_name}-model-{timestamp}"
endpoint_config_name = f"{endpoint_name}-config-{timestamp}"
model_package_arn=package["ModelPackageArn"]

sagemaker_client.create_model(
    ModelName=model_name, 
    ExecutionRoleArn=role, 
    Containers=[{
        "ModelPackageName": model_package_arn
    }] 
)
sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[{
        "ModelName": model_name,
        "InstanceType": "ml.m5.xlarge",
        "InitialVariantWeight": 1,
        "InitialInstanceCount": 1,
        "VariantName": "AllTraffic",
    }],

    DataCaptureConfig={
        "EnableCapture": True,
        "InitialSamplingPercentage": 100,
        "DestinationS3Uri": data_capture_destination,
        "CaptureOptions": [
            {
                "CaptureMode": "Input"
            },
            {
                "CaptureMode": "Output"
            },
        ],
        "CaptureContentTypeHeader": {
            "JsonContentTypes": [
                "application/json",
                "application/x-image"
            ]
        }
    },
)

{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-1:681340771742:endpoint-config/dogbreeds-endpoint-config-1103183233',
 'ResponseMetadata': {'RequestId': 'e58dde11-b8f3-4020-a9f3-b5fed768e9ff',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e58dde11-b8f3-4020-a9f3-b5fed768e9ff',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '117',
   'date': 'Fri, 03 Nov 2023 18:32:34 GMT'},
  'RetryAttempts': 0}}

In [18]:
response = sagemaker_client.list_endpoints(NameContains=endpoint_name, MaxResults=1)

if len(response["Endpoints"]) == 0:
    sagemaker_client.create_endpoint(
        EndpointName=endpoint_name, 
        EndpointConfigName=endpoint_config_name,
    )
else:
    sagemaker_client.update_endpoint(
        EndpointName=endpoint_name, 
        EndpointConfigName=endpoint_config_name,
    )

## Evaluate

In [45]:
from PIL import Image
import io

def load_and_preprocess_image(image_path):
    # Load the image
    image = Image.open(image_path)
    # image = image.resize((224, 224))

    # Convert the image to bytes
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format='JPEG')  # Adjust format if needed
    img_byte_arr = img_byte_arr.getvalue()

    return img_byte_arr

image_path = './test_images/australian-shepherd.jpg'
image_data = load_and_preprocess_image(image_path)

In [46]:
# image_data

In [48]:
import boto3
import json

endpoint_name = "dogbreeds-endpoint"

client = boto3.client('sagemaker-runtime')

content_type = "application/x-image"

response = client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=image_data
)

# Parse the response
response_body = response['Body'].read()
predictions = json.loads(response_body)
print(predictions)

ValidationError: An error occurred (ValidationError) when calling the InvokeEndpoint operation: Endpoint dogbreeds-endpoint of account 681340771742 not found.

# Setup Monitoring

In [10]:
from time import sleep
from sagemaker.model_monitor import MonitoringExecution


def describe_monitoring_schedules(endpoint_name):
    schedules = []
    response = sagemaker_client.list_monitoring_schedules(EndpointName=endpoint_name)[
        "MonitoringScheduleSummaries"
    ]
    for item in response:
        name = item["MonitoringScheduleName"]
        schedule = {
            "Name": name,
            "Type": item["MonitoringType"],
        }

        description = sagemaker_client.describe_monitoring_schedule(
            MonitoringScheduleName=name
        )

        schedule["Status"] = description["MonitoringScheduleStatus"]

        last_execution_status = description["LastMonitoringExecutionSummary"][
            "MonitoringExecutionStatus"
        ]

        schedule["Last Execution Status"] = last_execution_status
        schedule["Last Execution Date"] = str(description["LastMonitoringExecutionSummary"]["LastModifiedTime"])

        if last_execution_status == "Failed":
            schedule["Failure Reason"] = description["LastMonitoringExecutionSummary"][
                "FailureReason"
            ]
        elif last_execution_status == "CompletedWithViolations":
            processing_job_arn = description["LastMonitoringExecutionSummary"][
                "ProcessingJobArn"
            ]
            execution = MonitoringExecution.from_processing_arn(
                sagemaker_session=sagemaker_session,
                processing_job_arn=processing_job_arn,
            )
            execution_destination = execution.output.destination

            violations_filepath = os.path.join(
                execution_destination, "constraint_violations.json"
            )
            violations = json.loads(S3Downloader.read_file(violations_filepath))[
                "violations"
            ]

            schedule["Violations"] = violations

        schedules.append(schedule)

    return schedules


def describe_monitoring_schedule(endpoint_name, monitoring_type):
    found = False

    schedules = describe_monitoring_schedules(endpoint_name)
    for schedule in schedules:
        if schedule["Type"] == monitoring_type:
            found = True
            print(json.dumps(schedule, indent=2))

    if not found:
        print(f"There's no {monitoring_type} Monitoring Schedule.")


def describe_data_monitoring_schedule(endpoint_name):
    describe_monitoring_schedule(endpoint_name, "DataQuality")


def describe_model_monitoring_schedule(endpoint_name):
    describe_monitoring_schedule(endpoint_name, "ModelQuality")


def delete_monitoring_schedule(endpoint_name, monitoring_type):
    attempts = 30
    found = False

    response = sagemaker_client.list_monitoring_schedules(EndpointName=endpoint_name)[
        "MonitoringScheduleSummaries"
    ]
    for item in response:
        if item["MonitoringType"] == monitoring_type:
            found = True
            
            summary = sagemaker_client.describe_monitoring_schedule(
                MonitoringScheduleName=item["MonitoringScheduleName"]
            )
            status = summary["MonitoringScheduleStatus"]

            if status == "Scheduled" and "LastMonitoringExecutionSummary" in summary and "MonitoringExecutionStatus" in summary["LastMonitoringExecutionSummary"]:
                status = summary["LastMonitoringExecutionSummary"]["MonitoringExecutionStatus"]

            while status in ("Pending", "InProgress") and attempts > 0:
                attempts -= 1
                print(
                    f"Monitoring schedule status: {status}. Waiting for it to finish."
                )
                sleep(30)

                status = sagemaker_client.describe_monitoring_schedule(
                    MonitoringScheduleName=item["MonitoringScheduleName"]
                )["MonitoringScheduleStatus"]

            if status not in ("Pending", "InProgress"):
                sagemaker_client.delete_monitoring_schedule(
                    MonitoringScheduleName=item["MonitoringScheduleName"]
                )
                print("Monitoring schedule deleted.")
            else:
                print("Waiting for monitoring schedule timed out")

    if not found:
        print(f"There's no {monitoring_type} Monitoring Schedule.")


def delete_data_monitoring_schedule(endpoint_name):
    delete_monitoring_schedule(endpoint_name, "DataQuality")


def delete_model_monitoring_schedule(endpoint_name):
    delete_monitoring_schedule(endpoint_name, "ModelQuality")

In [None]:
from sagemaker.workflow.quality_check_step import (
    QualityCheckStep,
    DataQualityCheckConfig,
)
from sagemaker.workflow.check_job_config import CheckJobConfig
from sagemaker.model_monitor.dataset_format import DatasetFormat

data_quality_baseline_step = QualityCheckStep(
    name="generate-data-quality-baseline",
    check_job_config=CheckJobConfig(
        instance_type="ml.c5.xlarge",
        instance_count=1,
        volume_size_in_gb=20,
        sagemaker_session=pipeline_session,
        role=role,
    ),
    quality_check_config=DataQualityCheckConfig(
        baseline_dataset=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
            "train"
        ].S3Output.S3Uri,
        dataset_format=DatasetFormat.csv(header=True, output_columns_position="START"),
        output_s3_uri=DATA_QUALITY_LOCATION,
    ),
    model_package_group_name=MODEL_PACKAGE_GROUP,
    skip_check=True,
    register_new_baseline=True,
    cache_config=cache_config,
)