In [1]:
import sagemaker
import boto3
from sagemaker.workflow.parameters import ParameterString, ParameterInteger
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep, TrainingStep, TuningStep



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
# Parameters for the pipeline
pipeline_name = "MLOpsPipeline"

instance_type = ParameterString(name="InstanceType", default_value="ml.t3.medium")
model_name = ParameterString(name="ModelName", default_value="model")
instance_count = ParameterInteger(name="InstanceCount", default_value=1)
role = sagemaker.get_execution_role()

s3_input_data = ParameterString(
    name="InputData",
    default_value="s3://ml-ops-zenon/Input/diabetes-dev-1.csv"
)

In [3]:
# retrieve sklearn image
session = boto3.Session()  # Initialize boto3 session
region = session.region_name

image_uri = sagemaker.image_uris.retrieve(
    framework="sklearn",
    region=region,
    version="1.2-1",
    py_version="py3",
    instance_type="ml.t3.medium",
)

In [4]:
script_processor = ScriptProcessor(
    image_uri=image_uri,
    role=role,
    instance_count=instance_count,
    instance_type=instance_type
)

In [5]:
# import os
# current_dir = os.getcwd()

processing_step = ProcessingStep(
    name="DataProcessing",
    processor=script_processor,
    inputs=[
        ProcessingInput(source=s3_input_data, destination='/opt/ml/processing/input')
    ],
    outputs=[
        ProcessingOutput(output_name="train_data", source='/opt/ml/processing/train'),
        ProcessingOutput(output_name="test_data", source='/opt/ml/processing/test')
    ],
    code="preprocessing.py"
)

In [None]:
# import os
# print(f"Current directory: {os.getcwd()}")

# # Change to the correct directory
# os.chdir('/home/sagemaker-user/Random-Forest/Pipeline')  # Replace with your actual folder name

# # Verify the new directory
# print(f"New directory: {os.getcwd()}")
# print(f"Files in directory: {os.listdir()}")

In [6]:
label_engineering_step = ProcessingStep(
    name="LabelEngineering",
    processor=script_processor,
    inputs=[
        ProcessingInput(source=s3_input_data, destination='/opt/ml/processing/input')
    ],
    outputs=[
        ProcessingOutput(output_name="train_labels", source='/opt/ml/processing/train_labels'),
        ProcessingOutput(output_name="test_labels", source='/opt/ml/processing/test_labels')
    ],
    code="label_engineering.py"
)

In [7]:
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

image_uri1 = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.2-1",
    py_version="py3",
    instance_type="ml.t3.medium",
)

xgboost_estimator = Estimator(
    image_uri=image_uri1,
    role=role,
    instance_count=1,
    instance_type='ml.t3.medium',
    output_path="s3://ml-ops-zenon/model/"
)

xgboost_estimator.set_hyperparameters(
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.7,
    objective='binary:logistic',
    num_round=100
)

training_step = TrainingStep(
    name="ModelTraining",
    estimator=xgboost_estimator,
    inputs={
        'train': TrainingInput(s3_data=processing_step.properties.ProcessingOutputConfig.Outputs['train_data'].S3Output.S3Uri),
        'validation': TrainingInput(s3_data=processing_step.properties.ProcessingOutputConfig.Outputs['test_data'].S3Output.S3Uri)
    }
)

In [50]:
from sagemaker.tuner import HyperparameterTuner, ContinuousParameter

hyperparameter_ranges = {
    'eta': ContinuousParameter(0.1, 0.5),
    'max_depth': ContinuousParameter(3, 7)
}

tuner = HyperparameterTuner(
    estimator=xgboost_estimator,
    objective_metric_name='validation:auc',
    hyperparameter_ranges=hyperparameter_ranges,
    max_jobs=10,
    max_parallel_jobs=2
)

tuning_step = TuningStep(
    name="HyperparameterTuning",
    tuner=tuner,
    inputs={
        'train': TrainingInput(s3_data=processing_step.properties.ProcessingOutputConfig.Outputs['train_data'].S3Output.S3Uri),
        'validation': TrainingInput(s3_data=processing_step.properties.ProcessingOutputConfig.Outputs['test_data'].S3Output.S3Uri)
    }
)

In [51]:
evaluation_step = ProcessingStep(
    name="ModelEvaluation",
    processor=script_processor,
    inputs=[
        ProcessingInput(source=training_step.properties.ModelArtifacts.S3ModelArtifacts, destination='/opt/ml/processing/model')
    ],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation")
    ],
    code="evaluate.py"
)

In [53]:
from sagemaker.workflow.model_step import ModelStep
from sagemaker.model import Model
from sagemaker.workflow.pipeline_context import PipelineSession

model = Model(
    image_uri=image_uri1,
    model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=PipelineSession(),
    role=role,
)

step_model_create = ModelStep(
   name="MyModelCreationStep",
   step_args=model.create(instance_type="ml.m5.large"),
)



In [55]:
import time

from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.sklearn import SKLearnModel
from sagemaker.xgboost import XGBoostModel

pipeline_session = PipelineSession()

code_location = 's3://{0}/{1}/code'.format(bucket_name, prefix)

sklearn_model = SKLearnModel(
   model_data=processing_step.properties.ProcessingOutputConfig.Outputs['model'].S3Output.S3Uri,
   entry_point='inference.py',
   source_dir='sklearn_source_dir/',
   code_location=code_location,
   framework_version='1.0-1',
   role=role,
   sagemaker_session=pipeline_session,
   py_version='py3'
)

xgboost_model = XGBoostModel(
   model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
   entry_point='inference.py',
   source_dir='xgboost_source_dir/',
   code_location=code_location,
   framework_version='0.90-2',
   py_version='py3',
   sagemaker_session=pipeline_session,
   role=role
)

from sagemaker.workflow.model_step import ModelStep
from sagemaker import PipelineModel

pipeline_model = PipelineModel(
   models=[sklearn_model, xgboost_model],
   role=role,sagemaker_session=pipeline_session,
)

register_model_step_args = pipeline_model.register(
    content_types=["application/json"],
   response_types=["application/json"],
   inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
   transform_instances=["ml.m5.xlarge"],
   model_package_group_name='sipgroup',
)

step_model_registration = ModelStep(
   name="AbaloneRegisterModel",
   step_args=register_model_step_args,
)

NameError: name 'pipeline_model' is not defined

In [None]:
from sagemaker.workflow.steps import TransformStep
from sagemaker.transformer import Transformer

transformer = Transformer(
    model_name=model_registration_step.properties.ModelName,
    instance_count=1,
    instance_type='ml.t3.medium',
    output_path='s3://ml-ops-zenon/churn-data/output'
)

transform_step = TransformStep(
    name="BatchInference",
    transformer=transformer,
    inputs=TrainingInput(s3_data=processing_step.properties.Outputs['test_data'])
)

In [None]:
from sagemaker.model_monitor import DefaultModelMonitor

monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type='ml.t3.medium',
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800
)

monitor.create_monitoring_schedule(
    endpoint_name='my-endpoint',
    output_s3_uri='s3://ml-ops-zenon/monitoring',
    schedule_cron_expression='cron(0 * ? * * *)'
)

In [39]:
from sagemaker.workflow.pipeline import Pipeline
role = sagemaker.get_execution_role()

pipeline = Pipeline(
    name="CustomerChurnPipeline",
    steps=[
        processing_step,
        label_engineering_step,
        training_step,
        evaluation_step,
        # model_registration_step,
        # transform_step
    ],
    sagemaker_session=sagemaker.Session()
)


pipeline.create(role_arn=role)  # This creates the pipeline in SageMaker

TypeError: unsupported operand type(s) for +: 'NoneType' and 'list'

In [None]:
execution = pipeline.start()
execution.wait()

In [None]:
pipeline.list_executions()