In [24]:
import sagemaker
import boto3
from sagemaker.inputs import TrainingInput
from sagemaker.model import Model
from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.workflow.steps import ProcessingStep, TrainingStep
from sagemaker.model import Model
from sagemaker.workflow.model_step import ModelStep
from sagemaker.workflow.parameters import ParameterString

In [2]:
# Initialize SageMaker session and client
sagemaker_session = sagemaker.Session()
sm_client = boto3.client('sagemaker')
pipeline_session = PipelineSession()
role = sagemaker.get_execution_role()  # Replace with your SageMaker execution role

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


In [51]:
# Define input parameters
input_data_uri = ParameterString(
    name="InputDataUri",
    default_value="s3://hemz-sagemaker-bucket/input-data/"
)

output_data_uri = ParameterString(
    name="OutputDataUri",
    default_value="s3://hemz-sagemaker-bucket/output-data/"
)

model_approval_status = ParameterString(
    name="ModelApprovalStatus",
    default_value="Approved"
)

custom_image_uri = "654654222480.dkr.ecr.ap-south-1.amazonaws.com/demo_prophet:latest"

model_package_group_name = "ProphetModelGroup"

endpoint_config_name = "prophet-endpoint-config"

endpoint_name = "prophet-endpoint"

In [4]:
# Define preprocessing step
script_processor = ScriptProcessor(
    image_uri=custom_image_uri,  # Use a custom image with Prophet installed
    command=["python3"],
    role=role,
    sagemaker_session=pipeline_session,
    instance_count=1,
    instance_type="ml.t3.medium"
)

In [5]:
preprocessing_step = ProcessingStep(
    name="PreprocessData",
    processor=script_processor,
    inputs=[sagemaker.processing.ProcessingInput(source=input_data_uri, destination="/opt/ml/processing/input")],
    outputs=[sagemaker.processing.ProcessingOutput(output_name="train", destination=f"{output_data_uri.default_value}train", source="/opt/ml/processing/output/train")],
    code="code/preprocess.py"  # Replace with your preprocessing script
)

In [6]:
# Define the custom estimator for Prophet
prophet_estimator = sagemaker.estimator.Estimator(
    image_uri=custom_image_uri,  # Use a custom image with Prophet installed
    role=role,
    sagemaker_session=pipeline_session,
    instance_count=1,
    instance_type="ml.m5.large",
    entry_point='code/train.py',
    script_mode=True,
    output_path="s3://hemz-sagemaker-bucket/output-data/model-artifacts/"
)

In [7]:
# Define training step
training_step = TrainingStep(
    name="TrainModel",
    estimator=prophet_estimator,
    inputs={
        "train": TrainingInput(s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri, content_type="text/csv")
    }
)

In [30]:
model = Model(
    image_uri=custom_image_uri,
    model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=pipeline_session,
    role=role,
)

In [9]:
register_model_step = model.register(
    content_types=["text/csv"],
    response_types=["text/csv"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
)



In [10]:
# Define model registration step
register_model_step = ModelStep(name="RegisterModel", step_args=register_model_step)

In [35]:
# # Define create model step (https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-steps.html#step-type-model)
# model_create_step = ModelStep(
#    name="CreateModel",
#    step_args=model.create(instance_type="ml.m5.xlarge"),
# )

In [14]:
# Training pipeline
training_pipeline = Pipeline(
    name="TrainingPipeline",
    parameters=[input_data_uri, output_data_uri, model_approval_status],
    steps=[preprocessing_step, training_step, register_model_step],
)

In [16]:
# Execute pipelines
training_pipeline.upsert(role_arn=role)



{'PipelineArn': 'arn:aws:sagemaker:ap-south-1:654654222480:pipeline/TrainingPipeline',
 'ResponseMetadata': {'RequestId': '5de0e397-eee8-4fd3-b141-f6940e3dc588',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '5de0e397-eee8-4fd3-b141-f6940e3dc588',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '85',
   'date': 'Sat, 17 Aug 2024 16:19:36 GMT'},
  'RetryAttempts': 0}}

In [17]:
# Start pipelines
execution = training_pipeline.start()

_PipelineExecution(arn='arn:aws:sagemaker:ap-south-1:654654222480:pipeline/TrainingPipeline/execution/a267zynpnsct', sagemaker_session=<sagemaker.session.Session object at 0x7fa40c510970>)

In [56]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:ap-south-1:654654222480:pipeline/TrainingPipeline',
 'PipelineName': 'TrainingPipeline',
 'PipelineDisplayName': 'TrainingPipeline',
 'PipelineDefinition': '{"Version": "2020-12-01", "Metadata": {}, "Parameters": [{"Name": "InputDataUri", "Type": "String", "DefaultValue": "s3://hemz-sagemaker-bucket/input-data/"}, {"Name": "OutputDataUri", "Type": "String", "DefaultValue": "s3://hemz-sagemaker-bucket/output-data/"}, {"Name": "ModelApprovalStatus", "Type": "String", "DefaultValue": "Approved"}], "PipelineExperimentConfig": {"ExperimentName": {"Get": "Execution.PipelineName"}, "TrialName": {"Get": "Execution.PipelineExecutionId"}}, "Steps": [{"Name": "PreprocessData", "Type": "Processing", "Arguments": {"ProcessingResources": {"ClusterConfig": {"InstanceType": "ml.t3.medium", "InstanceCount": 1, "VolumeSizeInGB": 30}}, "AppSpecification": {"ImageUri": "654654222480.dkr.ecr.ap-south-1.amazonaws.com/demo_prophet:latest", "ContainerEntrypoint": ["python3",

In [None]:
execution.wait()

In [62]:
from sagemaker.workflow.functions import Join

In [65]:
destination = Join(on = '/', values = [output_data_uri, training_step.properties.ModelArtifacts.S3ModelArtifacts])

In [66]:
destination

Join(on='/', values=[ParameterString(name='OutputDataUri', parameter_type=<ParameterTypeEnum.STRING: 'String'>, default_value='s3://hemz-sagemaker-bucket/output-data/'), {'_step': <sagemaker.workflow.steps.TrainingStep object at 0x7fa40c6e9030>, 'step_name': 'TrainModel', 'path': 'ModelArtifacts.S3ModelArtifacts', '_shape_names': ['S3Uri'], '__str__': 'S3Uri'}])

In [67]:
# # Create a model for endpoint
# sm_client.create_model(
#     ModelName = model_name,
#     ExecutionRoleArn = role,
#     Containers = [{
#         "Image": custom_image_uri,
#         "Mode": "SingleModel",
#         "ModelDataUrl": training_step.properties.ModelArtifacts.S3ModelArtifacts.to_string(),
#     }]
# )

In [54]:
# # Create endpoint configuration
# endpoint_config = sm_client.create_endpoint_config(
#    EndpointConfigName="prophet-endpoint-config",
#    ProductionVariants=[
#         {
#             "ModelName": "pipelines-a267zynpnsct-CreateModel-CreateMo-RnIsVnjWsV",
#             "VariantName": "AllTraffic",
#             "ServerlessConfig": {
#                 "MemorySizeInMB": 2048,
#                 "MaxConcurrency": 1,
#                 "ProvisionedConcurrency": 1,
#             }
#         } 
#     ]
# )

In [55]:
# # Create the endpoint
# sm_client.create_endpoint(
#     EndpointName=endpoint_name,
#     EndpointConfigName=endpoint_config_name
# )

{'EndpointArn': 'arn:aws:sagemaker:ap-south-1:654654222480:endpoint/prophet-endpoint',
 'ResponseMetadata': {'RequestId': '2a5e1678-1fd0-4342-9de7-1e53dad0f86c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '2a5e1678-1fd0-4342-9de7-1e53dad0f86c',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '85',
   'date': 'Sat, 17 Aug 2024 18:16:14 GMT'},
  'RetryAttempts': 0}}