In [None]:
import sagemaker
import boto3
import os

from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat,
)
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline import PipelineDefinitionConfig
from sagemaker import image_uris
from steps.processor import get_processor_step
from steps.evaluator import get_evaluator_step
from steps.trainer import get_trainer_step

In [None]:
def get_parameters() -> dict:
    # - Common --------------------------------------
    random_seed = ParameterString(
        name="RandomSeed",
        default_value="1"
    )
    # - Processing ----------------------------------
    process_instance_count = ParameterInteger(
        name="ProcessingInstanceCount",
        default_value=1
    )
    process_instance_type = ParameterString(
        name="ProcessingInstanceType",
        default_value="ml.m5.large",
    )
    # - Trainer -------------------------------------
    tracking_server_arn = ParameterString(
        name="TrackingServerArn",
        default_value="arn:aws:sagemaker:eu-central-1:567821811420:mlflow-tracking-server/wildfire-mj",
    )
    train_instance_count = ParameterInteger(
        name="TrainInstanceCount",
        default_value=1
    )
    train_instance_type = ParameterString(
        name="TrainInstanceType",
        default_value="ml.p3.2xlarge",
    )
    train_epochs_num = ParameterInteger(
        name="NumberOfEpochs",
        default_value=10
    )
    train_batch_size = ParameterInteger(
        name="BatchSize",
        default_value=32
    )
    train_learning_rate = ParameterFloat(
        name="LearningRate",
        default_value=0.1
    )
    # - Evaluator ----------------------------------
    evaluator_instance_count = ParameterInteger(
        name="EvaluatingInstanceCount",
        default_value=1
    )
    evaluator_instance_type = ParameterString(
        name="EvaluatingInstanceType",
        default_value="ml.m5.large",
    )
    return {
        "random_seed": random_seed,
        "process_instance_count": process_instance_count,
        "process_instance_type": process_instance_type,
        "tracking_server_arn": tracking_server_arn,
        "train_instance_count": train_instance_count,
        "train_instance_type": train_instance_type,
        "train_epochs_num": train_epochs_num,
        "train_batch_size": train_batch_size,
        "train_learning_rate": train_learning_rate,
        "evaluator_instance_count": evaluator_instance_count,
        "evaluator_instance_type": evaluator_instance_type
    }

In [None]:
def get_conditional_step(
    project: str,
    bucket_name: str,
    process_instance_count_param: int,
    process_instance_type_param: str,
    evaluation_image_uri: str,
    region: str,

    model_path: str,
    wait_step: ProcessingStep,
    deployment_step: ProcessingStep,
    model_package_arn: str,
    condition_step_suffix: str
):
    logger.info("Starting conditional_step")

    step_fail = FailStep(
        name=f"{project}-fail",
        error_message="Execution failed due to Threshold"
    )


    # Condition Step: Model approval status kontrol etme
    return ConditionStep(
        name=f"CheckIfApproved{condition_step_suffix}",
        conditions=[
            ConditionEquals(
                left=JsonGet(
                    step=wait_step,
                    # property_file="Payload",
                    s3_uri=Join(
                            on="/",
                            values=[
                                f"s3://wildfires",
                                "model_status",
                                f"{model_package_arn}.json"
                            ],),
                    json_path="$.ApprovalStatus"
                ),
                right="Approved"
            )
        ],
        if_steps=[deployment_step],
        else_steps=[step_fail] # wait_step
    )


In [None]:
def get_pipeline(
    session: sagemaker.Session,
    parameters: dict,
    constants: dict,
    sklearn_image_uri: str,
):
    pipeline_def_config = PipelineDefinitionConfig(use_custom_job_prefix=True)

    # - Processing ----------------------------------
    processor_step = get_processor_step(
        project=constants["project"],
        bucket_name=constants["bucket_name"],
        process_instance_count=parameters["process_instance_count"],
        process_instance_type=parameters["process_instance_type"],
        sklearn_image_uri=sklearn_image_uri,
        region=constants["region"],
        seed=parameters["random_seed"]
    )

    # - Trainer -------------------------------------
    trainer_step = get_trainer_step(
        project=constants["project"],
        bucket_name=constants["bucket_name"],
        tracking_server_arn=parameters["tracking_server_arn"],
        train_instance_count=parameters["train_instance_count"],
        train_instance_type=parameters["train_instance_type"],
        region=constants["region"],
        epochs_num=parameters["train_epochs_num"],
        batch_size=parameters["train_batch_size"],
        learning_rate=parameters["train_learning_rate"],
        seed=parameters["random_seed"]
    )

    # - Evaluator ----------------------------------
    evaluator_step = get_evaluator_step(
        project=constants["project"],
        bucket_name=constants["bucket_name"],
        evaluator_instance_count=parameters["evaluator_instance_count"],
        evaluator_instance_type=parameters["evaluator_instance_type"],
        evaluation_image_uri='763104351884.dkr.ecr.eu-central-1.amazonaws.com/pytorch-inference:2.3.0-gpu-py311-cu121-ubuntu20.04-ec2',
        training_step=trainer_step,

        result_prefix='evaluation/result',
        model_package_arn='arn:aws:sagemaker:eu-central-1:567821811420:model-package/first-fire-mlflow-ee0049/1',
        region=constants["region"],
    )

    # ------------------------------------------------

    # - Model Registry ----------------------------------
    registry_step = get_conditional_registry_step(
        project=constants["project"],
        bucket_name=constants["bucket_name"],
        evaluator_instance_count=parameters["evaluator_instance_count"],
        evaluator_instance_type=parameters["evaluator_instance_type"],
        evaluation_image_uri='763104351884.dkr.ecr.eu-central-1.amazonaws.com/pytorch-inference:2.3.0-gpu-py311-cu121-ubuntu20.04-ec2',
        training_step=trainer_step,

        result_prefix='evaluation/result',
        model_package_arn='arn:aws:sagemaker:eu-central-1:567821811420:model-package/first-fire-mlflow-ee0049/1',
        region=constants["region"],
    )

    # ------------------------------------------------
    trainer_step.add_depends_on([processor_step])

    return Pipeline(
        name=f"{constants['project']}-pipeline",
        parameters=[parameters[key] for key in parameters],
        pipeline_definition_config=pipeline_def_config,
        steps=[
            processor_step,
            trainer_step,
            # evaluator_step
        ],
    )