In [None]:
%%sh
pip -q install sagemaker stepfunctions --upgrade

In [None]:
# Enter your role ARN
workflow_execution_role = ''

In [None]:
import boto3
import sagemaker
import stepfunctions

from stepfunctions import steps
from stepfunctions.steps import TrainingStep, ModelStep, EndpointConfigStep, EndpointStep, TransformStep, Chain
from stepfunctions.steps.states import Parallel
from stepfunctions.inputs import ExecutionInput
from stepfunctions.workflow import Workflow

In [None]:
sess = sagemaker.Session()
bucket = sess.default_bucket()   
role   = sagemaker.get_execution_role()

prefix = 'sklearn-boston-housing-stepfunc'

training_data = sess.upload_data(path='housing.csv', key_prefix=prefix + "/training")
output   = 's3://{}/{}/output/'.format(bucket,prefix)
print(training_data)
print(output)

In [None]:
import pandas as pd

data = pd.read_csv('housing.csv')
data.drop(['medv'], axis=1, inplace=True)
data.to_csv('test.csv', index=False, header=False)

batch_data = sess.upload_data(path='test.csv', key_prefix=prefix + "/batch")

In [None]:
from sagemaker.sklearn import SKLearn

sk = SKLearn(entry_point='sklearn-boston-housing.py',
                 role=role,
                 framework_version='0.23-1',
                 train_instance_count=1, 
                 train_instance_type='ml.m5.large',
                 output_path=output,
                 hyperparameters={
                      'normalize': True,
                      'test-size': 0.1,
                  }
)

In [None]:
execution_input = ExecutionInput(schema={
    'JobName': str, 
    'ModelName': str,
    'EndpointName': str
})

In [None]:
training_step = TrainingStep(
    'Train a Scikit-Learn script on the Boston Housing dataset', 
    estimator=sk,
    data={'training': sagemaker.inputs.TrainingInput(training_data, content_type='text/csv')},
    job_name=execution_input['JobName']  
)

In [None]:
model_step = ModelStep(
    'Create the model in SageMaker',
    model=training_step.get_expected_model(),
    model_name=execution_input['ModelName']  
)

In [None]:
transform_step = TransformStep(
    'Transform the dataset in batch mode',
    transformer=sk.transformer(instance_count=1, instance_type='ml.m5.large'),
    job_name=execution_input['JobName'],     
    model_name=execution_input['ModelName'], 
    data=batch_data,
    content_type='text/csv'
)

In [None]:
batch_branch = Chain([
    transform_step
])

In [None]:
endpoint_config_step = EndpointConfigStep(
    "Create an endpoint configuration for the model",
    endpoint_config_name=execution_input['ModelName'],
    model_name=execution_input['ModelName'],
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

In [None]:
endpoint_step = EndpointStep(
    "Create an endpoint hosting the model",
    endpoint_name=execution_input['EndpointName'],
    endpoint_config_name=execution_input['ModelName']
)

In [None]:
endpoint_branch = Chain([
    endpoint_config_step,
    endpoint_step
])

In [None]:
parallel_step = Parallel(
    'Parallel execution'
)

parallel_step.add_branch(batch_branch)
parallel_step.add_branch(endpoint_branch)

In [None]:
workflow_definition = Chain([
    training_step,
    model_step,
    parallel_step
])

In [None]:
import time

timestamp = time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

workflow = Workflow(
    name='sklearn-boston-housing-workflow2-{}'.format(timestamp),
    definition=workflow_definition,
    role=workflow_execution_role,
    execution_input=execution_input
)

In [None]:
#workflow.render_graph(portrait=True)

In [None]:
workflow.create()

In [None]:
execution = workflow.execute(
    inputs={
        'JobName': 'sklearn-boston-housing-{}'.format(timestamp), 
        'ModelName': 'sklearn-boston-housing-{}'.format(timestamp),
        'EndpointName': 'sklearn-boston-housing-{}'.format(timestamp)
    }
)

In [None]:
#execution.render_progress()

In [None]:
execution.list_events()

In [None]:
workflow.list_executions(html=True)

In [None]:
Workflow.list_workflows(html=True)

---