In [None]:
!wget -O processing.py https://bit.ly/3QiGDQO

In [None]:
!mkdir -p tmp

In [None]:
!wget -O tmp/bookings.all.csv https://bit.ly/3BUcMK4

In [None]:
s3_bucket = '<INSERT S3 BUCKET NAME HERE>'
prefix = 'pipeline'

In [None]:
!aws s3 mb s3://{s3_bucket}

In [None]:
source_path = f's3://{s3_bucket}/{prefix}' + \
               '/source/dataset.all.csv'

In [None]:
!aws s3 cp tmp/bookings.all.csv {source_path}

In [None]:
import boto3
import sagemaker

from sagemaker import get_execution_role
from sagemaker.sklearn.processing import (
    SKLearnProcessor
)
from sagemaker.workflow.steps import (
    ProcessingStep, 
    TrainingStep
)
from sagemaker.workflow.step_collections import (
    RegisterModel
)
from sagemaker.processing import (
    ProcessingInput, 
    ProcessingOutput
)
from sagemaker.workflow.parameters import (
    ParameterString
)
from sagemaker.inputs import TrainingInput
from sagemaker.estimator import Estimator
from sagemaker.workflow.pipeline import Pipeline

In [None]:
role = get_execution_role()

In [None]:
session = sagemaker.Session()

In [None]:
input_data = ParameterString(
    name="RawData",
    default_value=source_path, 
)

In [None]:
input_raw = ProcessingInput(
    source=input_data,
    destination='/opt/ml/processing/input/'
)

output_split = ProcessingOutput(
    output_name="split",
    source='/opt/ml/processing/output/', 
    destination=f's3://{s3_bucket}/{prefix}/output/'
)

In [None]:
processor = SKLearnProcessor(
    framework_version='0.20.0',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large'
)

step_process = ProcessingStep(
    name="PrepareData",  
    processor=processor,
    inputs=[input_raw],
    outputs=[output_split],
    code="processing.py",
)

In [None]:
model_path = f"s3://{s3_bucket}/{prefix}/model/"

In [None]:
model_id = "autogluon-classification-ensemble"

In [None]:
region_name = "us-west-2"

In [None]:
from sagemaker import image_uris

train_image_uri = image_uris.retrieve(
    region=region_name,
    framework=None,
    model_id=model_id,
    model_version="*",
    image_scope="training",
    instance_type="ml.m5.xlarge",
)

In [None]:
from sagemaker import script_uris

train_source_uri = script_uris.retrieve(
    model_id=model_id, 
    model_version="*", 
    script_scope="training"
)

In [None]:
!aws s3 cp {train_source_uri} tmp/sourcedir.tar.gz

In [None]:
from sagemaker import model_uris

train_model_uri = model_uris.retrieve(
    model_id=model_id, 
    model_version="*", 
    model_scope="training"
)

In [None]:
!aws s3 cp {train_model_uri} tmp/ensemble.tar.gz

In [None]:
from sagemaker.estimator import Estimator

estimator = Estimator(
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    max_run=900,
    output_path=model_path,
    session=session,
    role=role
)

In [None]:
from sagemaker.hyperparameters import retrieve_default

hyperparameters = retrieve_default(
    model_id=model_id, 
    model_version="*"
)
hyperparameters["verbosity"] = "3"
estimator.set_hyperparameters(**hyperparameters)

In [None]:
s3_data = step_process  \
              .properties  \
              .ProcessingOutputConfig  \
              .Outputs["split"]  \
              .S3Output.S3Uri

step_train = TrainingStep(
    name="TrainModel",
    estimator=estimator,
    inputs={
        "training": TrainingInput(
            s3_data=s3_data,
        )
    },
)

In [None]:
s3_data = step_process         \
    .properties                \
    .ProcessingOutputConfig    \
    .Outputs["split"]          \
    .S3Output.S3Uri            \

step_train = TrainingStep(
    name="TrainModel",
    estimator=estimator,
    inputs={
        "training": TrainingInput(
            s3_data=s3_data,
        )
    },
)

In [None]:
deploy_image_uri = image_uris.retrieve(
    region=region_name,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version="*",
    instance_type="ml.m5.xlarge",
)

deploy_source_uri = script_uris.retrieve(
    model_id=model_id, 
    model_version="*", 
    script_scope="inference"
)

In [None]:
!aws s3 cp {deploy_source_uri} tmp/sourcedir.tar.gz

In [None]:
updated_source_uri = f's3://{s3_bucket}/{prefix}' + \
                      '/sourcedir/sourcedir.tar.gz'

!aws s3 cp tmp/sourcedir.tar.gz {updated_source_uri}

In [None]:
import uuid

def random_string():
    return uuid.uuid4().hex.upper()[0:6]

In [None]:
from sagemaker.model import Model
from sagemaker.workflow.pipeline_context import \
    PipelineSession

pipeline_session = PipelineSession()

model_data = step_train    \
    .properties            \
    .ModelArtifacts        \
    .S3ModelArtifacts      \

model = Model(image_uri=deploy_image_uri, 
              source_dir=updated_source_uri,
              model_data=model_data,
              role=role,
              entry_point="inference.py",
              sagemaker_session=pipeline_session,
              name=random_string())

In [None]:
from sagemaker.workflow.model_step import ModelStep

model_package_group_name = "AutoGluonModelGroup"

register_args = model.register(
    content_types=["text/csv"],
    response_types=["application/json"],
    inference_instances=["ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status="Approved",
)

step_model_create = ModelStep(
    name="CreateModel",
    step_args=register_args
)

In [None]:
pipeline_name = f"PARTIAL-PIPELINE"

partial_pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        input_data
    ],
    steps=[
        step_process, 
        step_train,
        step_model_create,
    ],
)

In [None]:
partial_pipeline.upsert(role_arn=role)

In [None]:
execution = partial_pipeline.start()
execution.describe()

In [None]:
execution.wait()