In [3]:
import sys

!{sys.executable} -m pip install "sagemaker>=2.99.0"

import boto3
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession

sagemaker_session = sagemaker.session.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()
pipeline_session = PipelineSession()
default_bucket = sagemaker_session.default_bucket()
model_package_group_name = f"AbaloneModelPackageGroupName"

Keyring is skipped due to an exception: 'keyring.backends'
Collecting importlib-metadata<5.0,>=1.4.0
  Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)
Installing collected packages: importlib-metadata
  Attempting uninstall: importlib-metadata
    Found existing installation: importlib-metadata 5.1.0
    Uninstalling importlib-metadata-5.1.0:
      Successfully uninstalled importlib-metadata-5.1.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.
pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.
docker-compose 1.29.2 requires PyYAML<6,>=3.10, but you have pyyaml 6.0 which is incompatible.[0m[31m
[0mSuccessfully installed importlib-metadata-4.13.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0

In [4]:
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput

In [6]:
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput

model_path = f"s3://{default_bucket}/AbaloneTrain"
image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.0-1",
    py_version="py3",
    instance_type="ml.m5.xlarge",
)
xgb_train = Estimator(
    image_uri=image_uri,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    output_path=model_path,
    role=role,
    sagemaker_session=pipeline_session,
)
xgb_train.set_hyperparameters(
    objective="reg:linear",
    num_round=50,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.7,
)


In [14]:
from sagemaker.workflow.functions import Join
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat,
)
# s3_input_uri = Join(on="", values=[
#     "s3://",
#     ParameterString(name="MyInputData", default_value=default_bucket),
#     "/training"]
# )
input_data_uri=f"s3://{default_bucket}/training"
input_data = ParameterString(
    name="InputData",
    default_value=input_data_uri,
)

In [17]:
train_args = xgb_train.fit(
    inputs={
        "train": TrainingInput(
            s3_data=input_data,
            content_type="text/csv",
        )
    }
)

In [18]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep


step_train = TrainingStep(
    name="AbaloneTrain",
    step_args=train_args,
)

In [20]:
from sagemaker.workflow.pipeline import Pipeline


pipeline_name = f"AbalonePipeline"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        input_data,
    ],
    steps=[step_train],
)

In [21]:
import json


definition = json.loads(pipeline.definition())
definition

{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'InputData',
   'Type': 'String',
   'DefaultValue': 's3://sagemaker-us-east-1-419639163435/training'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'AbaloneTrain',
   'Type': 'Training',
   'Arguments': {'AlgorithmSpecification': {'TrainingInputMode': 'File',
     'TrainingImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3'},
    'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-east-1-419639163435/AbaloneTrain'},
    'StoppingCondition': {'MaxRuntimeInSeconds': 86400},
    'ResourceConfig': {'VolumeSizeInGB': 30,
     'InstanceCount': 1,
     'InstanceType': 'ml.m5.xlarge'},
    'RoleArn': 'arn:aws:iam::419639163435:role/service-role/AmazonSageMaker-ExecutionRole-20191112T173070',
    'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',

In [22]:
pipeline.upsert(role_arn=role)

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:419639163435:pipeline/abalonepipeline',
 'ResponseMetadata': {'RequestId': 'f182cbb0-c870-48a2-b9cc-16f6350b99b6',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f182cbb0-c870-48a2-b9cc-16f6350b99b6',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '83',
   'date': 'Sun, 19 Feb 2023 01:49:56 GMT'},
  'RetryAttempts': 0}}

In [45]:
local_path = "data/abalone-dataset.csv"
local_path_2 = "data/"

s3 = boto3.resource("s3")
s3.Bucket(f"sagemaker-sample-files").download_file(
    "datasets/tabular/uci_abalone/abalone.csv", local_path
)

base_uri = f"s3://my-bucket-for-ml-usecases/input_data_written"
base_uri_2 = f"s3://my-bucket-for-ml-usecases/foldern8"
input_data_uri = sagemaker.s3.S3Uploader.upload(
    local_path=local_path_2,
    desired_s3_uri=base_uri_2,
)

In [41]:
!touch data/sample_config.yaml

In [32]:

import boto3

client = boto3.client('sagemaker')

response = client.start_pipeline_execution(
    PipelineName=f"AbalonePipeline",
#     PipelineExecutionDisplayName='string',
    PipelineParameters=[
        {
            'Name': 'InputData',
            'Value': f's3://my-bucket-for-ml-usecases/input_data_written'
        },
    ],
#     PipelineExecutionDescription='string',
#     ClientRequestToken='string',
#     ParallelismConfiguration={
#         'MaxParallelExecutionSteps': 123
#     }
)