# Machine Learning Pipelines

## Machine Learning Orchestration with Amazon SageMaker Pipelines

In [5]:
import os
import sagemaker
import logging
import boto3
import time
import pandas as pd
import json
import botocore
from botocore.exceptions import ClientError


# ========================== low-level service client of the boto3 session ==========================
config = botocore.config.Config(user_agent_extra='bedissj-1699438736259')


sm = boto3.client(service_name='sagemaker', 
                  config=config)

sm_runtime = boto3.client('sagemaker-runtime',
                          config=config)

sess = sagemaker.Session(sagemaker_client=sm,
                         sagemaker_runtime_client=sm_runtime)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


![model_registry](./img/model_registry.png)

### 1.1 Configure Data Processing Step

In [None]:
raw_data_s3_uri = f's3://{bucket}/data/transformed_querying/month1'

In [8]:
from sagemaker.sklearn.processing import SKLearnProcessor


sklearn_processor = SKLearnProcessor(
    framework_version=FRAMEWORK_VERSION,
    role=role,
    instance_type=processing_instance_type,
    instance_count = processing_instance_count,
    env={'AWS_DEFAULT_REGION': region}
)


[0;31mInit signature:[0m
[0mSKLearnProcessor[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mframework_version[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrole[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0msagemaker[0m[0;34m.[0m[0mworkflow[0m[0;34m.[0m[0mentities[0m[0;34m.[0m[0mPipelineVariable[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minstance_count[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0msagemaker[0m[0;34m.[0m[0mworkflow[0m[0;34m.[0m[0mentities[0m[0;34m.[0m[0mPipelineVariable[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minstance_type[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0msagemaker[0m[0;34m.[0m[0mworkflow[0m[0;34m.[0m[0mentities[0m[0;34m.[0m[0mPipelineVariable[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m


In [6]:
from sagemaker.workflow.steps import ProcessingStep


processing_step = ProcessingStep(
    name='data-processing',
    code='./src/processing.py'
    processor=sklearn_processor,
    inputs=processing_inputs,
    outputs=processing_outputs,
    job_arguments=processing_arguments,
)

[0;31mInit signature:[0m
[0mProcessingStep[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstep_args[0m[0;34m:[0m [0msagemaker[0m[0;34m.[0m[0mworkflow[0m[0;34m.[0m[0mpipeline_context[0m[0;34m.[0m[0m_JobStepArguments[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprocessor[0m[0;34m:[0m [0msagemaker[0m[0;34m.[0m[0mprocessing[0m[0;34m.[0m[0mProcessor[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdisplay_name[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdescription[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minputs[0m[0;34m:[0m [0mList[0m[0;34m[[0m[0msagemaker[0m[0;34m.[0m[0mprocessing[0m[0;34m.[0m[0mProcessingInput[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moutputs