## CREATE MODEL CONFIG

### Define project name, username

![workflowimage](Images/pic2.PNG)

In [1]:
ProjectName = 'sampleproject-testing' # ex: 'cicd-hbbchurn'
DepartmentName = 'sample-department-name' 
UsecaseName = 'sample-usecase' # ex: 'mlops-cicd'
UserName = 'mlops-lakshani'
BUName = 'mlops' # ex: 'mlops'
AD = 'sample-ad-name' 
ResourceName = 'sample-resource-name'
OwnerName = 'saranga_gunasekara'
env = 'dev'
job_name = ''

project_prefix = f'{ProjectName}/{BUName}'

# RUN PIPELINE

![workflowimage](Images/pic3.PNG)

In [2]:
import os
import pytz
from datetime import datetime

#from model from mlops_experiment_and_trial_training import mlops_model_experiments

import boto3
import sagemaker
import sagemaker.session
from sagemaker.workflow.steps import CacheConfig
from sagemaker.transformer import Transformer
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput,TransformInput,CreateModelInput
from sagemaker.processing import (
    ProcessingInput,
    ProcessingOutput,
    ScriptProcessor,
)
from sagemaker.network import NetworkConfig
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.workflow.conditions import (
    ConditionGreaterThanOrEqualTo,
)
from sagemaker.workflow.condition_step import (
    ConditionStep,
    JsonGet,
)
from sagemaker.model_metrics import (
    MetricsSource,
    ModelMetrics,
)
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
)
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.steps import (
    ProcessingStep,
    TrainingStep,
    TransformStep,
    CreateModelStep
)
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.model import Model

from sagemaker.workflow.pipeline_context import LocalPipelineSession
local_pipeline_session = LocalPipelineSession()


sess = sagemaker.Session()

![workflowimage](Images/pic4.PNG)

In [3]:
mode = 'sm'

In [4]:
def get_session(region, default_bucket):
    """Gets the sagemaker session based on the region.
    Args:
        region: the aws region to start the session
        default_bucket: the bucket to use for storing the artifacts
    Returns:
        `sagemaker.session.Session instance
    """

    boto_session = boto3.Session(region_name=region)

    sagemaker_client = boto_session.client("sagemaker")
    #cloudwatch_client = boto_session.client("cloudwatch")
    runtime_client = boto_session.client("sagemaker-runtime")
    return sagemaker.session.Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_runtime_client=runtime_client,
        default_bucket=default_bucket,
    )

In [5]:
import pytz
from datetime import datetime

srilanka_tz = pytz.timezone('Asia/Colombo')
date_folder = datetime.now(srilanka_tz).strftime("%Y-%m-%d")

In [6]:
import json

def getJsonData(bucket_name,key_name):
    '''
    this will pick the json config file from s3 bucket
    '''
    
    print("[LOG]", bucket_name,'---------')
    print("[LOG]", key_name,'--------------')
      
    s3_client = boto3.client('s3')
    csv_obj = s3_client.get_object(Bucket=bucket_name, Key=key_name)
    
    body = csv_obj['Body']
    
    json_string = body.read().decode('utf-8')
    json_content = json.loads(json_string)
    
    return json_content

print("calling the config file")
config_bucket_name = f'dlk-cloud-tier-8-code-ml-{env}'
config_key_name = f'config_files/aif_config/{project_prefix}/config.json' 
config = getJsonData(config_bucket_name,config_key_name)


calling the config file
[LOG] dlk-cloud-tier-8-code-ml-dev ---------
[LOG] config_files/aif_config/sampleproject-testing/mlops/config.json --------------




![workflowimage](Images/pic7.PNG)

In [7]:
def get_pipeline(
    region,
    subnets,
    security_group_ids,
    role=None,
    default_bucket=None,
    model_package_group_name=config['ModelSpecific']['ModelPackageGroupName'],  # Choose any name
    pipeline_name=config['PipelineSpecific']['PipelineNamePrefix'],  # You can find your pipeline name in the Studio UI (project -> Pipelines -> name)
    base_job_prefix=config['UserDetails']['BUName'],  # Choose any name
    env=config['UserDetails']['EnvironmentName'],
    project_name = config['UserDetails']['ProjectName']
):
    
    project_name = config["UserDetails"]["ProjectName"]
    user_name=config["UserDetails"]["UserName"]
    BU_name = config["UserDetails"]["BUName"]
    
    """Gets a SageMaker ML Pipeline instance working with on CustomerChurn data.
    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts
    Returns:
        an instance of a pipeline
    """
    #data versioning control using date
    srilanka_tz = pytz.timezone('Asia/Colombo')
    s3 = boto3.client('s3')
    cw = boto3.client('cloudwatch')
    date_folder = datetime.now(srilanka_tz).strftime("%Y-%m-%d")
    
    #working with input data path
    input_data = config['S3LocationPaths']['PreprocessPath']
    
    #working with output data path   
    preprocessed_output1 = f"s3://dlk-cloud-tier-10-preprocessed-ml-{env}/{project_name}/train/output1/"
    preprocessed_output2 = f"s3://dlk-cloud-tier-10-preprocessed-ml-{env}/{project_name}/train/output2/"
    #preprocessed_output3 = f"s3://dlk-cloud-tier-10-preprocessed-ml-{env}/{project_name}/train/{date_folder}/output3/"
    #preprocessed_output4 = f"s3://dlk-cloud-tier-10-preprocessed-ml-{env}/{project_name}/train/{date_folder}/output4/"

    
    generic_tags=[{'Key': 'dialog:mlops:environmentname', 'Value': config['TagSpecific']['dialog:mlops:environmentname']}, 
                  {'Key': 'dialog:mlops:projectname', 'Value': config['TagSpecific']['dialog:mlops:projectname']},
                  {'Key': 'dialog:mlops:departmentname', 'Value': config['TagSpecific']['dialog:mlops:departmentname']},
                  {'Key': 'dialog:mlops:resourcename', 'Value': config['TagSpecific']['dialog:mlops:resourcename']}, 
                  {'Key': 'dialog:mlops:ownername', 'Value': config['TagSpecific']['dialog:mlops:ownername']},
                  {'Key': 'dialog:mlops:buname', 'Value': config['TagSpecific']['dialog:mlops:buname']} ]
    
    
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)
    account_id = boto3.client("sts").get_caller_identity().get("Account")
    region = boto3.session.Session().region_name

    # Parameters for pipeline execution
    model_path = ParameterString(
        name="ModelPath",
        default_value=f"s3://dlk-cloud-tier-9-training-ml-{env}/{project_name}/{date_folder}", 
    )
    
    model_approval_status = ParameterString(
        name="ModelApprovalStatus",
        default_value="Approved",  # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval.
    )

    # -------------------------- PREPROCESSING --------------------------------------------------------------------

        
    script_processor = ScriptProcessor(
         command = ["python3"],
         image_uri = config['DockerSpecific']['PreprocessURI'],
         role = role,
         instance_count = 1, #config['PipelineSpecific']['Preprocessing']['InstanceCount'],
         instance_type = config['PipelineSpecific']['Preprocessing']['InstanceType'],
         tags = generic_tags + [{'Key': 'JobType', 'Value': 'Preprocessing'}],
         network_config = NetworkConfig(subnets=subnets.split(':'), security_group_ids=security_group_ids.split(':'))
    )
    
    step_preprocess = ProcessingStep(
        name= f'{BUName}-{UsecaseName}-preprocessing',
        processor= script_processor, 
        code= 'mlops_sampleusecase_preprocessing/preprocessing.py',
        inputs= [ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),
                ProcessingInput(source='mlops_sampleusecase_preprocessing/my_package/', destination="/opt/ml/processing/input/code/my_package/"),
                #ProcessingInput(source=input_data3, destination="/opt/ml/processing/input3"),
                #ProcessingInput(source=input_data4, destination="/opt/ml/processing/input4"),

               ],
        outputs= [
            ProcessingOutput(output_name="output1", destination=preprocessed_output1, source="/opt/ml/processing/output1"),
            ProcessingOutput(output_name="output2", destination=preprocessed_output2,  source="/opt/ml/processing/output2"),
            #ProcessingOutput(output_name="output3", destination=preprocessed_output3,  source="/opt/ml/processing/output3"),
            #ProcessingOutput(output_name="output4", destination=preprocessed_output2,  source="/opt/ml/processing/output4"),
           
        ],
        job_arguments=["--project_name", project_name,          # add more args as you want
                       "--user_name",user_name,
                       "--BU_name",BU_name,
                      ]  


    )
    
    #------------------------------------- TRAINING --------------------------------------------------------------------
    
    recommender_image_uri = config['DockerSpecific']['TrainingURI']
    
    estimator = Estimator(image_uri=recommender_image_uri,
                      role=role,
                      sagemaker_session=local_pipeline_session if mode=='local' else sagemaker_session,                                # Technical object
                      #sagemaker_session = sagemaker_session,
                      output_path=model_path,
                      base_job_name=f'{project_name}-training-job',
                      input_mode='File',
                          entry_point = "train.py",
                          source_dir = "mlops_sampleusecase_training/model/",# Copy the dataset and then train    
                      train_instance_count=int(config['PipelineSpecific']['Training']['InstanceCount']),
                      train_instance_type=config['PipelineSpecific']['Training']['InstanceType'],
                      #train_instance_type = 'local' if mode=='local' else "ml.m5.large",
                      debugger_hook_config=False,
                      disable_profiler = True,
                      metric_definitions=[
                          # Only 40 Metrics can be accomodated
                            {'Name': 'roc_auc_score' , 'Regex': 'roc_auc_score:([-+]?[0-9]*\.?[0-9]+)'},
                            {'Name': 'precision', 'Regex': 'precision:([-+]?[0-9]*\.?[0-9]+)'},
                            {'Name': 'recall_score', 'Regex': 'recall_score:([-+]?[0-9]*\.?[0-9]+)'},
                            {'Name': 'f1_score', 'Regex': 'f1_score:([-+]?[0-9]*\.?[0-9]+)'},
                            {'Name': 'accuracy_score', 'Regex': 'accuracy_score:([-+]?[0-9]*\.?[0-9]+)'},
                          
                       ],
                      tags = generic_tags + [{'Key': 'JobType', 'Value': 'Training'}],
                      subnets = subnets.split(':'),
                      security_group_ids = security_group_ids.split(':')
                         )
    print (project_name,user_name,BU_name)
    
    estimator.set_hyperparameters(
    
        project_name = project_name,
        user_name=user_name,
        BU_name = BU_name
     )
   
    # start training
    step_train = TrainingStep(
        name= f"{BUName}-{UsecaseName}-training",
        estimator= estimator,
        inputs = {
            "input1": TrainingInput(
                #s3_data= "s3://dlk-cloud-tier-10-preprocessed-ml-dev/awsworkshop/train/2022-08-31/output1/X.csv",
                s3_data = step_preprocess.properties.ProcessingOutputConfig.Outputs["output1"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "input2": TrainingInput(
               # s3_data= "s3://dlk-cloud-tier-10-preprocessed-ml-dev/awsworkshop/train/2022-08-31/output2/y.csv",
                s3_data = step_preprocess.properties.ProcessingOutputConfig.Outputs["output2"].S3Output.S3Uri,
               content_type="text/csv",
            ),

        },
       # depends_on = [step_preprocess]
    )
    
    
    ###### --------------------- Model Registry ----------------------------------------------------------------
    
    #registering the model

    step_register = RegisterModel(
        name= f"{BUName}-{UsecaseName}",
        estimator= estimator,
        model_data= step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types= ["text/csv"],
        response_types= ["text/csv"],
        inference_instances= ["ml.t2.medium", "ml.m5.xlarge"],
        transform_instances= ["ml.m5.xlarge"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        #model_metrics=model_metrics,
    )

    # ========================================= PIPELINE ORCHESTRATION ================================================
    
    # Pipeline instance
    pipeline = Pipeline(
        name=pipeline_name+env,
        parameters=[
            model_path,
            model_approval_status
        ],
        steps=[
            step_preprocess,
            step_train,
            step_register,
              ],
        #sagemaker_session=sagemaker_session,
        sagemaker_session= local_pipeline_session if mode=='local' else sagemaker_session,
    )
    return pipeline

![workflowimage](Images/pic8.PNG)

In [8]:
config["UserDetails"]["ProjectName"]

'sampleproject-testing'

In [9]:
dev_subnets = 'subnet-036d6b39301b4a41a'
dev_sg = 'sg-0970930479bbef573'

In [10]:
region ='ap-southeast-1'
role='arn:aws:iam::120582440665:role/Sagemaker'
#role=None
default_bucket='pipeline-sagemaker-test'
pipeline_def = get_pipeline(region, 
                            dev_subnets, 
                            dev_sg, 
                            role,
                            default_bucket)

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


sampleproject-testing mlops-user1 mlops


In [11]:
pipeline_def.upsert(role_arn=role)


No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


{'PipelineArn': 'arn:aws:sagemaker:ap-southeast-1:120582440665:pipeline/mlops-sample-usecase-pipeline-dev',
 'ResponseMetadata': {'RequestId': '60fc0903-7ea8-4bcd-a8f7-59b11e36c287',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '60fc0903-7ea8-4bcd-a8f7-59b11e36c287',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '106',
   'date': 'Mon, 24 Jul 2023 10:18:18 GMT'},
  'RetryAttempts': 0}}

In [13]:
execution = pipeline_def.start()
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:ap-southeast-1:120582440665:pipeline/mlops-sample-usecase-pipeline-dev',
 'PipelineExecutionArn': 'arn:aws:sagemaker:ap-southeast-1:120582440665:pipeline/mlops-sample-usecase-pipeline-dev/execution/cpslerko4o3r',
 'PipelineExecutionDisplayName': 'execution-1690193908825',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2023, 7, 24, 10, 18, 28, 743000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2023, 7, 24, 10, 18, 28, 743000, tzinfo=tzlocal()),
 'CreatedBy': {},
 'LastModifiedBy': {},
 'ResponseMetadata': {'RequestId': 'b6623de2-6e77-490e-8bd1-edef4751be11',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'b6623de2-6e77-490e-8bd1-edef4751be11',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '441',
   'date': 'Mon, 24 Jul 2023 10:18:28 GMT'},
  'RetryAttempts': 0}}

In [13]:

# import boto3
# s3 = boto3.client("s3")
# #s3://dlk-cloud-tier-9-training-ml-dev/experiment-report/mlops-experiment-trial-excel/
# s3.download_file(
#     Bucket="dlk-cloud-tier-9-training-ml-dev", Key="experiment-report/mlops-experiment-trial-excel/mlops-model-performance-experiment.xlsx", Filename="file5.xlsx"
# )

In [14]:
# #------------------delete pipeline ------------------------
# import boto3
# sm_client = boto3.client("sagemaker")
# sm_client.delete_pipeline(PipelineName="mlops-sample-usecase-pipeline-dev")
