In [1]:
!mkdir -p tmp

In [2]:
g = "raw.githubusercontent.com"
p = "PacktPublishing"
a = "Amazon-SageMaker-Cookbook"
mc = "master/Chapter01"

path = f"https://{g}/{p}/{a}/{mc}/files"

In [3]:
fname = "management_experience_and_salary.csv"

!wget -P tmp {path}/{fname}

--2021-06-08 09:56:32--  https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Chapter01/files/management_experience_and_salary.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 331 [text/plain]
Saving to: ‘tmp/management_experience_and_salary.csv.10’


2021-06-08 09:56:32 (10.9 MB/s) - ‘tmp/management_experience_and_salary.csv.10’ saved [331/331]



In [4]:
import pandas as pd
filename = f"tmp/{fname}"
df_all_data = pd.read_csv(filename)

In [5]:
df_all_data

Unnamed: 0,last_name,management_experience_months,monthly_salary
0,Taylor,65,1630
1,Wang,61,1330
2,Brown,38,1290
3,Harris,71,1480
4,Jones,94,1590
5,Garcia,93,1750
6,Williams,15,1020
7,Lee,56,1290
8,White,59,1430
9,Tan,7,960


In [6]:
from sklearn.model_selection import train_test_split

dad = df_all_data

X = dad['management_experience_months'].values 
y = dad['monthly_salary'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.3, random_state=0
)

In [7]:
import pandas as pd

df_training_data = pd.DataFrame({ 
    'monthly_salary': y_train, 
    'management_experience_months': X_train
})

df_training_data

Unnamed: 0,monthly_salary,management_experience_months
0,1020,15
1,1390,49
2,1590,94
3,1290,38
4,1750,93
5,1240,46
6,960,7
7,1290,56
8,960,19
9,1340,67


In [8]:
df_training_data.to_csv(
    'tmp/training_data.csv', 
    header=False, index=False
)

In [9]:
s3_bucket = 'sagemaker-cookbook-bucket'
prefix = 'chapter09'

In [10]:
tn = "training_data.csv"
source = f"tmp/{tn}"
dest = f"s3://{s3_bucket}/{prefix}/input/{tn}"

!aws s3 cp {source} {dest}

upload: tmp/training_data.csv to s3://sagemaker-cookbook-bucket/chapter09/input/training_data.csv


In [11]:
import sagemaker 
import boto3
from sagemaker import get_execution_role 

role = get_execution_role()
session = sagemaker.Session()
region_name = boto3.Session().region_name

In [12]:
training_s3_input_location = f"s3://{s3_bucket}/{prefix}/input/training_data.csv" 
training_s3_output_location = f"s3://{s3_bucket}/{prefix}/output/"

In [13]:
from sagemaker.inputs import TrainingInput

train = TrainingInput(
    training_s3_input_location, 
    content_type="text/csv"
)

In [14]:
from sagemaker.image_uris import retrieve 

container = retrieve(
    "linear-learner", 
    region_name, "1"
)

container

'382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1'

In [15]:
estimator = sagemaker.estimator.Estimator(
    container,
    role, 
    instance_count=1, 
    instance_type='ml.m5.xlarge',
    output_path=training_s3_output_location,
    sagemaker_session=session
)

In [16]:
estimator.set_hyperparameters(
    predictor_type='regressor', 
    mini_batch_size=4
)

In [17]:
!pip -q install --upgrade stepfunctions

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.[0m


In [18]:
execution_role = 'arn:aws:iam::581320662326:role/test-002'

In [19]:
from stepfunctions.inputs import ExecutionInput

In [20]:
execution_input = ExecutionInput(
    schema={ 
        'ModelName': str,
        'EndpointName': str,
        'JobName': str
    }
)

ei = execution_input

In [21]:
from stepfunctions.steps import TrainingStep

In [22]:
training_step = TrainingStep(
    'Training Step', 
    estimator=estimator,
    data={
        'train': train
    },
    job_name=ei['JobName']
)

In [23]:
from stepfunctions.steps import ModelStep

In [24]:
model_step = ModelStep(
    'Model Step',
    model=training_step.get_expected_model(),
    model_name=ei['ModelName']  
)

In [25]:
from stepfunctions.steps import EndpointConfigStep

In [26]:
endpoint_config_step = EndpointConfigStep(
    "Create Endpoint Configuration",
    endpoint_config_name=ei['ModelName'],
    model_name=ei['ModelName'],
    initial_instance_count=1,
    instance_type='ml.m5.xlarge'
)

In [27]:
from stepfunctions.steps import EndpointStep

In [28]:
endpoint_step = EndpointStep(
    "Deploy Endpoint",
    endpoint_name=ei['EndpointName'],
    endpoint_config_name=ei['ModelName']
)

In [29]:
from stepfunctions.steps import Chain

In [30]:
workflow_definition = Chain([
    training_step,
    model_step,
    endpoint_config_step,
    endpoint_step
])

In [31]:
import uuid

uuid.uuid4().hex

'ff44abb88f9d4a0b9c173c48dbe0d8d7'

In [32]:
def generate_random_string():
    return uuid.uuid4().hex

grs = generate_random_string

In [33]:
import uuid
from stepfunctions.workflow import Workflow

workflow = Workflow(
    name='{}-{}'.format('Workflow', grs()),
    definition=workflow_definition,
    role=execution_role,
    execution_input=execution_input
)

In [34]:
workflow.create()

'arn:aws:states:us-east-1:581320662326:stateMachine:Workflow-6eced24724454517b105b3dd048499f5'

In [35]:
execution = workflow.execute(
    inputs={
        'JobName': 'll-{}'.format(grs()),
        'ModelName': 'll-{}'.format(grs()),
        'EndpointName': 'll-{}'.format(grs())
    }
)

In [36]:
execution.list_events()

[{'timestamp': datetime.datetime(2021, 6, 8, 9, 56, 39, 114000, tzinfo=tzlocal()),
  'type': 'ExecutionStarted',
  'id': 1,
  'previousEventId': 0,
  'executionStartedEventDetails': {'input': '{\n    "JobName": "ll-3e17f51b69d74dad8cb0b76c6a42832c",\n    "ModelName": "ll-df05949385b0430785f36b15703ff336",\n    "EndpointName": "ll-ae59156c050542f3bf9801c5dd3a9145"\n}',
   'inputDetails': {'truncated': False},
   'roleArn': 'arn:aws:iam::581320662326:role/test-002'}},
 {'timestamp': datetime.datetime(2021, 6, 8, 9, 56, 39, 151000, tzinfo=tzlocal()),
  'type': 'TaskStateEntered',
  'id': 2,
  'previousEventId': 0,
  'stateEnteredEventDetails': {'name': 'Training Step',
   'input': '{\n    "JobName": "ll-3e17f51b69d74dad8cb0b76c6a42832c",\n    "ModelName": "ll-df05949385b0430785f36b15703ff336",\n    "EndpointName": "ll-ae59156c050542f3bf9801c5dd3a9145"\n}',
   'inputDetails': {'truncated': False}}},
 {'timestamp': datetime.datetime(2021, 6, 8, 9, 56, 39, 151000, tzinfo=tzlocal()),
  'type'

In [37]:
import pandas as pd

In [38]:
events = execution.list_events()
pd.json_normalize(events)

Unnamed: 0,timestamp,type,id,previousEventId,executionStartedEventDetails.input,executionStartedEventDetails.inputDetails.truncated,executionStartedEventDetails.roleArn,stateEnteredEventDetails.name,stateEnteredEventDetails.input,stateEnteredEventDetails.inputDetails.truncated,taskScheduledEventDetails.resourceType,taskScheduledEventDetails.resource,taskScheduledEventDetails.region,taskScheduledEventDetails.parameters,taskStartedEventDetails.resourceType,taskStartedEventDetails.resource
0,2021-06-08 09:56:39.114000+00:00,ExecutionStarted,1,0,"{\n ""JobName"": ""ll-3e17f51b69d74dad8cb0b76c...",False,arn:aws:iam::581320662326:role/test-002,,,,,,,,,
1,2021-06-08 09:56:39.151000+00:00,TaskStateEntered,2,0,,,,Training Step,"{\n ""JobName"": ""ll-3e17f51b69d74dad8cb0b76c...",False,,,,,,
2,2021-06-08 09:56:39.151000+00:00,TaskScheduled,3,2,,,,,,,sagemaker,createTrainingJob.sync,us-east-1,"{""AlgorithmSpecification"":{""TrainingImage"":""38...",,
3,2021-06-08 09:56:39.297000+00:00,TaskStarted,4,3,,,,,,,,,,,sagemaker,createTrainingJob.sync


In [39]:
workflow.__dict__

{'timeout_seconds': None,
 'comment': None,
 'version': None,
 'definition': Graph(timeout_seconds=None, comment=None, version=None),
 'name': 'Workflow-6eced24724454517b105b3dd048499f5',
 'role': 'arn:aws:iam::581320662326:role/test-002',
 'tags': [],
 'workflow_input': <stepfunctions.inputs.placeholders.ExecutionInput at 0x7f96233860d0>,
 'client': <botocore.client.SFN at 0x7f96224d1690>,
 'format_json': True,
 'state_machine_arn': 'arn:aws:states:us-east-1:581320662326:stateMachine:Workflow-6eced24724454517b105b3dd048499f5'}

In [40]:
print(workflow.definition.to_json(pretty=True))

{
    "StartAt": "Training Step",
    "States": {
        "Training Step": {
            "Resource": "arn:aws:states:::sagemaker:createTrainingJob.sync",
            "Parameters": {
                "AlgorithmSpecification": {
                    "TrainingImage": "382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1",
                    "TrainingInputMode": "File"
                },
                "OutputDataConfig": {
                    "S3OutputPath": "s3://sagemaker-cookbook-bucket/chapter09/output/"
                },
                "StoppingCondition": {
                    "MaxRuntimeInSeconds": 86400
                },
                "ResourceConfig": {
                    "InstanceCount": 1,
                    "InstanceType": "ml.m5.xlarge",
                    "VolumeSizeInGB": 30
                },
                "RoleArn": "arn:aws:iam::581320662326:role/SuperAdminRole",
                "InputDataConfig": [
                    {
                        "DataSo