## Config

In [None]:
import boto3
import sagemaker
import sagemaker.session

region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()
default_bucket = sagemaker_session.default_bucket()
model_package_group_name = f"SagemakerWorkshop"

In [None]:
from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.parameters import ParameterString,ParameterInteger
from sagemaker.model_metrics import MetricsSource, ModelMetrics

## Steps

### Data processing

In [None]:
# Use the sklearn_processor in a Sagemaker pipelines ProcessingStep
script_preprocess = ScriptProcessor(
    image_uri="057384803197.dkr.ecr.eu-central-1.amazonaws.com/mlops-training-image:latest",
    command=["python3"],
    instance_type="ml.c4.xlarge",
    instance_count=1,
    base_job_name="script-workshop-preprocess",
    role=role,
)

step_preprocess = ProcessingStep(
    name="workshop-preprocess-data",
    processor=script_preprocess,
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
        ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
    ],
    code="steps/preprocess.py",
)

### Training

In [None]:
processing_estimator_count = ParameterString(
    name="processing_estimator_count",
    default_value="100"
)
processing_max_depth = ParameterString(
    name="processing_max_depth",
    default_value="10"
)

In [None]:
script_train = ScriptProcessor(
    image_uri="057384803197.dkr.ecr.eu-central-1.amazonaws.com/mlops-training-image:latest",
    command=["python3"],
    instance_type="ml.c4.xlarge",
    instance_count=1,
    base_job_name="script-workshop-train",
    role=role,
)

step_train = ProcessingStep(
    name="workshop-train-model",
    processor=script_train,
    inputs=[
        ProcessingInput(
            source=step_preprocess.properties.ProcessingOutputConfig.Outputs[
                "train"
            ].S3Output.S3Uri,
            destination="/opt/ml/processing/train",
        )
    ],
    outputs=[
        ProcessingOutput(output_name="model", source="/opt/ml/processing/model"),
    ],
    code="steps/train.py",
    property_files=[],
    job_arguments=["--processing_estimator_count", processing_estimator_count, "--processing_max_depth", processing_max_depth],
)

### Eval

In [None]:
script_eval = ScriptProcessor(
    image_uri="057384803197.dkr.ecr.eu-central-1.amazonaws.com/mlops-training-image:latest",
    command=["python3"],
    instance_type="ml.c4.xlarge",
    instance_count=1,
    base_job_name="script-workshop-eval",
    role=role,
)

evaluation_report = PropertyFile(
    name="EvaluationReport",
    output_name="evaluation",
    path="evaluation.json"
)

step_eval = ProcessingStep(
    name="workshop-eval-model",
    processor=script_train,
    inputs=[
        ProcessingInput(
            source=step_preprocess.properties.ProcessingOutputConfig.Outputs[
                "test"
            ].S3Output.S3Uri,
            destination="/opt/ml/processing/test",
        ),
        ProcessingInput(
            source=step_train.properties.ProcessingOutputConfig.Outputs[
                "model"
            ].S3Output.S3Uri,
            destination="/opt/ml/processing/model",
        )
    ],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"),
    ],
    code="steps/train.py",
    property_files=[evaluation_report],
)

model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
        ),
        content_type="application/json",
    )
)

### Deploy model

In [None]:
script_deploy = ScriptProcessor(
    image_uri="057384803197.dkr.ecr.eu-central-1.amazonaws.com/mlops-training-image:latest",
    command=["python3"],
    instance_type="ml.c4.xlarge",
    instance_count=1,
    base_job_name="script-workshop-deploy",
    role=role,
)

step_deploy = ProcessingStep(
    name="workshop-deploy-model",
    processor=script_deploy,
    inputs=[],
    outputs=[],
    code="steps/deploy.py",
    property_files=[],
)

### Condition

In [None]:
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.workflow.functions import JsonGet


cond_gte = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step_name=step_eval.name,
        property_file=evaluation_report,
        json_path="metrics.accuracy.value"
    ),
    right=0.9
)

step_cond = ConditionStep(
    name="accuracy-check",
    conditions=[cond_gte],
    if_steps=[step_deploy],
    else_steps=[],
)

## Pipeline

### Define

In [None]:
from sagemaker.workflow.pipeline import Pipeline


pipeline_name = f"WorkshopPipelineThomasDehaene"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_estimator_count,
        processing_max_depth
    ],
    steps=[
        step_preprocess,
        step_train,
        step_eval,
        step_cond],
)

### Run

In [None]:
import json

json.loads(pipeline.definition())

In [None]:
pipeline.upsert(role_arn=role)

In [None]:
execution = pipeline.start()

In [None]:
execution.describe()