In [1]:
import sagemaker

sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()

bucket = "pipeline-cross-validation"
prefix = "data"
num_class = 8

In [2]:
from sagemaker.workflow.parameters import ParameterString

training_instance_type = ParameterString(
    name="TrainingInstanceType",
    default_value="ml.p3.2xlarge"
)
src_test_path = ParameterString(
    name="src_test_path",
    default_value=f"s3://{bucket}/{prefix}/test/features/",
)

model_instance_type = "ml.m5.large",

In [4]:
hyperparameters = {
    "num_class":num_class,
    "max_depth":"5",
    "eta":"0.2",
    "min_child_weight":"1",
    "objective":"multi:softmax",
    "eval_metric":"mlogloss",
    "num_round":"500",
    "tree_method":"gpu_hist",
    "early_stopping_rounds":"10",
    # "csv_weights": "1"
}

In [5]:
def create_steps(iteration):
    
    # Parameters
    src_train_path = f"s3://{bucket}/{prefix}/iter{iteration}"
    dst_model_path = f"s3://{bucket}/{prefix}/model_train/iter{iteration}"
    train_name = f"train_iter{iteration}"
    model_name = train_name.replace("_", "-")
    dst_test_path = f"s3://{bucket}/{prefix}/output/iter{iteration}"
    transform_name = f"transform_iter{iteration}"
    
    from pipeline_cv.xgboost import get_step_train
    step_train = get_step_train(
        train_name,
        training_instance_type,
        hyperparameters,
        dst_model_path,
        src_train_path
    )
    
    from pipeline_cv.xgboost import get_step_model
    step_create_model = get_step_model(
        training_instance_type,
        step_train,
        model_name,
        model_instance_type
    )
    
    from pipeline_cv.xgboost import get_step_transform
    step_transform = get_step_transform(
        step_create_model,
        model_instance_type,
        dst_test_path,
        transform_name,
        src_test_path
    )
    
    steps = [
        step_train,
        step_create_model,
        step_transform,
    ]
    
    return steps

In [6]:
iterations = 4
whole_steps = []
for iteration in range(iterations):
    steps = create_steps(iteration)
    whole_steps.extend(steps)

In [7]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"220709Pipeline2"

pipeline = Pipeline(
    name=pipeline_name,
    parameters=[training_instance_type, src_test_path],
    steps=whole_steps,
)

In [8]:
pipeline.upsert(role_arn=role)
execution = pipeline.start()

In [19]:
execution.list_steps()

[{'StepName': 'transform_iter0',
  'StartTime': datetime.datetime(2022, 7, 9, 9, 19, 43, 922000, tzinfo=tzlocal()),
  'StepStatus': 'Executing',
  'AttemptCount': 0,
  'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-east-2:173863741403:transform-job/pipelines-rfvats4iv8wu-transform-iter0-gtbzsfdeso'}}},
 {'StepName': 'train-iter0',
  'StartTime': datetime.datetime(2022, 7, 9, 9, 19, 41, 694000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2022, 7, 9, 9, 19, 43, 135000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'AttemptCount': 0,
  'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-2:173863741403:model/pipelines-rfvats4iv8wu-train-iter0-ovzktsordp'}}},
 {'StepName': 'transform_iter2',
  'StartTime': datetime.datetime(2022, 7, 9, 9, 19, 35, 338000, tzinfo=tzlocal()),
  'StepStatus': 'Executing',
  'AttemptCount': 0,
  'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-east-2:173863741403:transform-job/pipelines-rfvats4iv8wu-transform-iter2-w3smhrb4x2

In [18]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:us-east-2:173863741403:pipeline/220709pipeline2',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-2:173863741403:pipeline/220709pipeline2/execution/rfvats4iv8wu',
 'PipelineExecutionDisplayName': 'execution-1657358129776',
 'PipelineExecutionStatus': 'Executing',
 'PipelineExperimentConfig': {'ExperimentName': '220709pipeline2',
  'TrialName': 'rfvats4iv8wu'},
 'CreationTime': datetime.datetime(2022, 7, 9, 9, 15, 29, 692000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 7, 9, 9, 15, 29, 692000, tzinfo=tzlocal()),
 'CreatedBy': {},
 'LastModifiedBy': {},
 'ResponseMetadata': {'RequestId': 'df595c39-9f4b-4d90-a0d8-631a6ca2cf0f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'df595c39-9f4b-4d90-a0d8-631a6ca2cf0f',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '486',
   'date': 'Sat, 09 Jul 2022 09:22:22 GMT'},
  'RetryAttempts': 0}}