In [1]:
import sagemaker
import boto3
from sagemaker.inputs import TrainingInput
from sagemaker.model import Model
from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CreateModelStep
from sagemaker.workflow.model_step import ModelStep
from sagemaker.workflow.parameters import ParameterString

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
# Initialize SageMaker session and client
sagemaker_session = sagemaker.Session()
sm_client = boto3.client('sagemaker')
pipeline_session = PipelineSession()
role = sagemaker.get_execution_role()  # Replace with your SageMaker execution role

In [3]:
# Define input parameters
input_data_uri = ParameterString(
    name="InputDataUri",
    default_value="s3://hemz-bucket/input-data/"
)

output_data_uri = ParameterString(
    name="OutputDataUri",
    default_value="s3://hemz-bucket/output-data/"
)

model_approval_status = ParameterString(
    name="ModelApprovalStatus",
    default_value="Approved"
)

custom_image_uri = "654654222480.dkr.ecr.us-east-1.amazonaws.com/demo_prophet:latest"

model_package_group_name = "DemoProphetModelGroup"

In [4]:
# Define preprocessing step
script_processor = ScriptProcessor(
    image_uri=custom_image_uri,  # Use a custom image with Prophet installed
    command=["python3"],
    role=role,
    sagemaker_session=pipeline_session,
    instance_count=1,
    instance_type="ml.t3.medium"
)

In [5]:
preprocessing_step = ProcessingStep(
    name="PreprocessData",
    processor=script_processor,
    inputs=[sagemaker.processing.ProcessingInput(source=input_data_uri, destination="/opt/ml/processing/input")],
    outputs=[sagemaker.processing.ProcessingOutput(output_name="processed_data", destination=output_data_uri, source="/opt/ml/processing/output/preprocessed")],
    code="code/preprocess.py"  # Replace with your preprocessing script
)

In [6]:
# preprocessing_step = script_processor.run(
#     inputs=[sagemaker.processing.ProcessingInput(source=input_data_uri, destination="/opt/ml/processing/input")],
#     outputs=[sagemaker.processing.ProcessingOutput(output_name="processed_data", destination=output_data_uri, source="/opt/ml/processing/output")],
#     code="code/preprocess.py"  # Replace with your preprocessing script
# )

In [7]:
# preprocessing_step = ProcessingStep(name="PreprocessData", step_args=preprocessing_step)

In [8]:
# Define the custom estimator for Prophet
prophet_estimator = sagemaker.estimator.Estimator(
    image_uri=custom_image_uri,  # Use a custom image with Prophet installed
    role=role,
    instance_count=1,
    instance_type="ml.t3.medium",
    entry_point='code/train.py',
    script_mode=True,
    output_path="s3://hemz-bucket/output-data/model-artifacts/"
)

In [10]:
# Define training step
training_step = TrainingStep(
    name="TrainProphetModel",
    estimator=prophet_estimator,
    inputs={
        "train": TrainingInput(s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs["preprocessed"].S3Output.S3Uri, content_type="text/csv")
    }
)

In [11]:
model = Model(
    image_uri=custom_image_uri,
    model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=pipeline_session,
    role=role,
)

In [12]:
register_step = model.register(
    content_types=["text/csv"],
    response_types=["text/csv"],
    # inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    # transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
)



In [13]:
# Define model registration step
register_step = ModelStep(name="RegisterModel", step_args=register_step)

In [14]:
# Training pipeline
training_pipeline = Pipeline(
    name="TrainingPipeline",
    parameters=[input_data_uri, output_data_uri, model_approval_status],
    steps=[preprocessing_step, training_step],
)

In [15]:
# Execute pipelines
training_pipeline.upsert(role_arn=role)



{'PipelineArn': 'arn:aws:sagemaker:us-east-1:654654222480:pipeline/TrainingPipeline',
 'ResponseMetadata': {'RequestId': '61f01436-1c0c-40d9-8f34-81022c01b550',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '61f01436-1c0c-40d9-8f34-81022c01b550',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '84',
   'date': 'Fri, 16 Aug 2024 13:26:09 GMT'},
  'RetryAttempts': 0}}

In [16]:
# Start pipelines
training_pipeline.start()

_PipelineExecution(arn='arn:aws:sagemaker:us-east-1:654654222480:pipeline/TrainingPipeline/execution/otzemv7eqpyi', sagemaker_session=<sagemaker.session.Session object at 0x7f74c085d960>)