In [1]:
import sagemaker

sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()

bucket = "pipeline-cross-validation"
prefix = "data"
num_class = 8

In [2]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
)

training_instance_type = ParameterString(
    name="TrainingInstanceType",
    default_value="ml.p3.2xlarge"
)
batch_data = ParameterString(
    name="BatchData",
    default_value=f"s3://{bucket}/{prefix}/test/features/",
)

In [3]:
hyperparameters = {
    "num_class":num_class,
    "max_depth":"5",
    "eta":"0.2",
    "min_child_weight":"1",
    "objective":"multi:softmax",
    "eval_metric":"mlogloss",
    "num_round":"500",
    "tree_method":"gpu_hist",
    "early_stopping_rounds":"10",
    # "csv_weights": "1"
}

In [4]:
from sagemaker.estimator import Estimator

iteration = 0
model_path = f"s3://{bucket}/{prefix}/model_train/iter{iteration}"

image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=sagemaker_session.boto_region_name,
    version="1.2-2",
    py_version="py3",
    instance_type=training_instance_type,
)
xgb_train = Estimator(
    image_uri=image_uri,
    hyperparameters=hyperparameters,
    instance_type=training_instance_type,
    instance_count=1,
    output_path=model_path,
    role=role,
)

In [5]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

train_name = f"train_iter{iteration}"

step_train = TrainingStep(
    name=train_name,
    estimator=xgb_train,
    inputs={
        "train": TrainingInput(
            s3_data=f"s3://{bucket}/{prefix}/iter{iteration}/train/",
            content_type="text/csv"
        ),
        "validation": TrainingInput(
            s3_data=f"s3://{bucket}/{prefix}/iter{iteration}/validation/",
            content_type="text/csv"
        )
    },
)

In [6]:
from sagemaker.model import Model

model = Model(
    image_uri=image_uri,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=sagemaker_session,
    role=role,
)

In [7]:
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.steps import CreateModelStep

model_name = train_name.replace("_", "-")

inputs = CreateModelInput(
    instance_type="ml.m5.large",
#     accelerator_type="ml.eia1.medium",
)
step_create_model = CreateModelStep(
    name=model_name,
    model=model,
    inputs=inputs,
)

In [8]:
from sagemaker.transformer import Transformer

transform_path = f"s3://{bucket}/{prefix}/output/iter{iteration}"

transformer = Transformer(
    model_name=step_create_model.properties.ModelName,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    output_path=transform_path
)


In [9]:
from sagemaker.inputs import TransformInput
from sagemaker.workflow.steps import TransformStep

transform_name = f"transform_iter{iteration}"

step_transform = TransformStep(
    name=transform_name,
    transformer=transformer,
    inputs=TransformInput(data=batch_data)
)

In [10]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"220709Pipeline"

pipeline = Pipeline(
    name=pipeline_name,
    parameters=[training_instance_type, batch_data],
    steps=[
        step_train,
        step_create_model,
        step_transform,
    ],
)


In [11]:
pipeline.upsert(role_arn=role)
execution = pipeline.start()