In [2]:
!pip install -qU sagemaker

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
distributed 2022.7.0 requires tornado<6.2,>=6.0.3, but you have tornado 6.4 which is incompatible.[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
!mkdir -p data

In [4]:
import sys

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import dump_svmlight_file

import boto3
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession

sagemaker_session = sagemaker.session.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()
pipeline_session = PipelineSession()
default_bucket = sagemaker_session.default_bucket()
model_package_group_name = f"FinalProjectCICDPackageGroupName"
s3 = boto3.resource("s3")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [5]:
df = pd.read_csv("data/Churn_Modelling.csv")

df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


We will split the data for training and batch inference that will be used for invoking the model after deployment.

In [6]:
df, df_batch = train_test_split(df, test_size=0.1)

df.to_csv("data/ChurnPredictionMinusBatch.csv", index=False)

Change batch data type to LIBSVM as is expected by SageMaker

In [7]:
# data must not contain strings in libsvm

df_batch.drop(columns=["RowNumber", "Surname"], inplace=True)
df_batch = pd.get_dummies(df_batch)
df_batch.drop(columns="Gender_Male", inplace=True)

dump_svmlight_file(X=df_batch.drop(columns=["Exited"]),
                   y=df_batch["Exited"],
                   f="data/Churn_Modelling_batch.libsvm")

Upload data to S3

In [8]:
local_path = "data/ChurnPredictionMinusBatch.csv"

base_uri = f"s3://{default_bucket}/ChurnPredictionCICD"
input_data_uri = sagemaker.s3.S3Uploader.upload(
    local_path=local_path,
    desired_s3_uri=base_uri,
)
print(input_data_uri)

s3://sagemaker-us-east-1-075039479415/ChurnPredictionCICD/ChurnPredictionMinusBatch.csv


Upload batch transform dataset to S3

In [9]:
local_path = "data/Churn_Modelling_batch.libsvm"

base_uri = f"s3://{default_bucket}/ChurnPredictionCICD"
batch_data_uri = sagemaker.s3.S3Uploader.upload(
    local_path=local_path,
    desired_s3_uri=base_uri,
)
print(batch_data_uri)

s3://sagemaker-us-east-1-075039479415/ChurnPredictionCICD/Churn_Modelling_batch.libsvm


## Define Parameters to Parametrize Pipeline Execution

In [10]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat,
)

processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge")
model_approval_status = ParameterString(
    name="ModelApprovalStatus", default_value="PendingManualApproval"
)
input_data = ParameterString(
    name="InputData",
    default_value=input_data_uri,
)
batch_data = ParameterString(
    name="BatchData",
    default_value=batch_data_uri,
)
f1_threshold = ParameterFloat(name="F1Threshold", default_value=0.5)

## Define a Processing Step for Feature Engineering
Develop a preprocessing script to process the data and split into train/test/validation

In [11]:
!mkdir -p code

In [12]:
%%writefile code/preprocessing.py
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

if __name__ == "__main__":
    base_dir = "/opt/ml/processing"

    df = pd.read_csv(f"{base_dir}/input/ChurnPredictionMinusBatch.csv")
    
    df.drop(columns=["RowNumber", "Surname", "CustomerId"], inplace=True)
    df = pd.get_dummies(df)
    df.drop(columns="Gender_Male", inplace=True) # redundant column
    
    # Split features and target
    y = df.pop("Exited")
    X = df
    
    # Split data into train, test, and validation sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=1)

    # Save data to CSV files
    train_path = f"{base_dir}/train/train.csv"
    pd.concat([y_train, X_train], axis=1).to_csv(train_path, header=False, index=False)

    validation_path = f"{base_dir}/validation/validation.csv"
    pd.concat([y_val, X_val], axis=1).to_csv(validation_path, header=False, index=False)

    test_path = f"{base_dir}/test/test.csv"
    pd.concat([y_test, X_test], axis=1).to_csv(test_path, header=False, index=False)

Overwriting code/preprocessing.py


Create an instance of a `SKLearnProcessor` processor and use that in our `ProcessingStep`.

In [13]:
from sagemaker.sklearn.processing import SKLearnProcessor


framework_version = "1.2-1"

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type="ml.m5.xlarge",
    instance_count=processing_instance_count,
    base_job_name="ChurnPrediction-process",
    role=role,
    sagemaker_session=pipeline_session,
)

Pass the output from the sklearn processor to ProcessingStep.

In [14]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

processor_args = sklearn_processor.run(
    inputs=[
        ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
        ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
        ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
    ],
    code="code/preprocessing.py",
)

step_process = ProcessingStep(name="BankChurnProcess", step_args=processor_args)



## Define a Training Step to Train a Model

In [15]:
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput

model_path = f"s3://{default_bucket}/ChurnPredictionCICD"
image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.0-1",
    py_version="py3",
    instance_type="ml.m5.xlarge",
)
xgb_train = Estimator(
    image_uri=image_uri,
    instance_type=instance_type,
    instance_count=1,
    output_path=model_path,
    role=role,
    sagemaker_session=pipeline_session,
)
xgb_train.set_hyperparameters(
    objective="binary:logistic",
    colsample_bytree=0.7,
    gamma=10.0,
    eta=0.03,
    max_depth=3,
    num_round=215,
    subsample=0.7,
    alpha=2,
    reg_lambda=2.0,
)

train_args = xgb_train.fit(
    inputs={
        "train": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
            content_type="text/csv",
        ),
        "validation": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "validation"
            ].S3Output.S3Uri,
            content_type="text/csv",
        ),
    }
)

Define the training step

In [16]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep


step_train = TrainingStep(
    name="BankChurnTrain",
    step_args=train_args,
)

## Define a Model Evaluation Step to Evaluate the Trained Model
Develop an evaluation script

In [17]:
%%writefile code/evaluation.py
import json
import pathlib
import pickle
import tarfile

import joblib
import numpy as np
import pandas as pd
import xgboost

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

if __name__ == "__main__":
    model_path = f"/opt/ml/processing/model/model.tar.gz"
    with tarfile.open(model_path) as tar:
        tar.extractall(path=".")

    model = pickle.load(open("xgboost-model", "rb"))

    test_path = "/opt/ml/processing/test/test.csv"
    df = pd.read_csv(test_path, header=None)

    y_test = df.iloc[:, 0].to_numpy()
    df.drop(df.columns[0], axis=1, inplace=True)

    X_test = xgboost.DMatrix(df.values)

    predictions = model.predict(X_test)
    predictions = (predictions >= 0.5).astype(int) # change to binary

    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    print("p", precision, "r", recall, "f1", f1)

    report_dict = {
        "classification_metrics": {
            "precision": precision,
            "recall": recall,
            "f1_score": f1,
        },
    }
    
    output_dir = "/opt/ml/processing/evaluation"
    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

    evaluation_path = f"{output_dir}/evaluation.json"
    with open(evaluation_path, "w") as f:
        f.write(json.dumps(report_dict))

Overwriting code/evaluation.py


Create an instance of a `ScriptProcessor` processor and use it in the `ProcessingStep`.

In [18]:
from sagemaker.processing import ScriptProcessor


script_eval = ScriptProcessor(
    image_uri=image_uri,
    command=["python3"],
    instance_type="ml.m5.xlarge",
    instance_count=1,
    base_job_name="script-BankChurn-eval",
    role=role,
    sagemaker_session=pipeline_session,
)

eval_args = script_eval.run(
    inputs=[
        ProcessingInput(
            source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/model",
        ),
        ProcessingInput(
            source=step_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
            destination="/opt/ml/processing/test",
        ),
    ],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"),
    ],
    code="code/evaluation.py",
)

Use the processor's arguments returned by `.run()` to construct a `ProcessingStep`, along with the input and output channels and the code that will be executed when the pipeline invokes pipeline execution.

In [19]:
from sagemaker.workflow.properties import PropertyFile


evaluation_report = PropertyFile(
    name="EvaluationReport", output_name="evaluation", path="evaluation.json"
)
step_eval = ProcessingStep(
    name="BankChurnModelEval",
    step_args=eval_args,
    property_files=[evaluation_report],
)

## Define a Create Model Step to Create a Model

In [20]:
from sagemaker.model import Model

model = Model(
    image_uri=image_uri,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=pipeline_session,
    role=role,
)

Define the `ModelStep` by providing the return values from `model.create()` as the step arguments.

In [21]:
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.model_step import ModelStep

step_create_model = ModelStep(
    name="BankChurnCreateModel",
    step_args=model.create(instance_type="ml.m5.large", accelerator_type="ml.eia1.medium"),
)

## Define a Transform Step to Perform Batch Transformation

In [22]:
from sagemaker.transformer import Transformer


transformer = Transformer(
    model_name=step_create_model.properties.ModelName,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    output_path=f"s3://{default_bucket}/BankChurnTransform",
)

Pass in the transformer instance and the `TransformInput` with the `batch_data` pipeline parameter defined earlier.

In [23]:
from sagemaker.inputs import TransformInput
from sagemaker.workflow.steps import TransformStep


step_transform = TransformStep(
    name="BankChurnTransform", transformer=transformer, inputs=TransformInput(data=batch_data)
)

## Define a Register Model Step to Create a Model Package

In [25]:
from sagemaker.model_metrics import MetricsSource, ModelMetrics

model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
        ),
        content_type="application/json",
    )
)

register_args = model.register(
    content_types=["text/csv"],
    response_types=["text/csv"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
    model_metrics=model_metrics,
)
step_register = ModelStep(name="BankChurnCICDRegisterModel", step_args=register_args)



## Define a Fail Step to Terminate the Pipeline Execution and Mark it as Failed

In [26]:
from sagemaker.workflow.fail_step import FailStep
from sagemaker.workflow.functions import Join

step_fail = FailStep(
    name="ChurnPredictionF1Fail",
    error_message=Join(on=" ", values=["Execution failed due to f1 <", f1_threshold]),
)

## Define a Condition Step to Check Model Performance and Conditionally Create a Model and Run a Batch Transformation and Register a Model in the Model Registry, Or Terminate the Execution in Failed State

In [27]:
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.workflow.functions import JsonGet


cond_lte = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step_name=step_eval.name,
        property_file=evaluation_report,
        json_path="classification_metrics.f1_score",
    ),
    right=f1_threshold,
)

step_cond = ConditionStep(
    name="BankChurnF1Cond",
    conditions=[cond_lte],
    if_steps=[step_register, step_create_model, step_transform],
    else_steps=[step_fail],
)

## Define a Pipeline of Parameters, Steps, and Conditions

In [28]:
from sagemaker.workflow.pipeline import Pipeline


pipeline_name = f"BankChurnPipeline"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_instance_count,
        instance_type,
        model_approval_status,
        input_data,
        batch_data,
        f1_threshold,
    ],
    steps=[step_process, step_train, step_eval, step_cond],
)

### Examine the Pipeline

In [29]:
import json


definition = json.loads(pipeline.definition())
definition



{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ProcessingInstanceCount',
   'Type': 'Integer',
   'DefaultValue': 1},
  {'Name': 'TrainingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.xlarge'},
  {'Name': 'ModelApprovalStatus',
   'Type': 'String',
   'DefaultValue': 'PendingManualApproval'},
  {'Name': 'InputData',
   'Type': 'String',
   'DefaultValue': 's3://sagemaker-us-east-1-075039479415/ChurnPredictionCICD/ChurnPredictionMinusBatch.csv'},
  {'Name': 'BatchData',
   'Type': 'String',
   'DefaultValue': 's3://sagemaker-us-east-1-075039479415/ChurnPredictionCICD/Churn_Modelling_batch.libsvm'},
  {'Name': 'F1Threshold', 'Type': 'Float', 'DefaultValue': 0.5}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'BankChurnProcess',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.xla

## Submit the pipeline to SageMaker and start execution

In [30]:
pipeline.upsert(role_arn=role)



{'PipelineArn': 'arn:aws:sagemaker:us-east-1:075039479415:pipeline/BankChurnPipeline',
 'ResponseMetadata': {'RequestId': 'b2b35c26-9129-47b5-86a4-b16adc7127b9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'b2b35c26-9129-47b5-86a4-b16adc7127b9',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '85',
   'date': 'Wed, 21 Feb 2024 05:26:13 GMT'},
  'RetryAttempts': 0}}

Start the pipeline and accept all the default parameters.

In [30]:
execution = pipeline.start()

In [31]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:075039479415:pipeline/BankChurnPipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:075039479415:pipeline/BankChurnPipeline/execution/0984dq4ydu65',
 'PipelineExecutionDisplayName': 'execution-1708423170324',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2024, 2, 20, 9, 59, 30, 210000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 2, 20, 9, 59, 30, 210000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:075039479415:user-profile/d-xwawdmhn3mkv/joseph-sagemaker-studio',
  'UserProfileName': 'joseph-sagemaker-studio',
  'DomainId': 'd-xwawdmhn3mkv'},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:075039479415:user-profile/d-xwawdmhn3mkv/joseph-sagemaker-studio',
  'UserProfileName': 'joseph-sagemaker-studio',
  'DomainId': 'd-xwawdmhn3mkv'},
 'ResponseMetadata': {'RequestId': '2a6c0f90-6a28-498b-af7b-ffe8cdfdde5a',
  'HTTPStatusCode

Wait for the execution to complete.

In [32]:
execution.wait()

In [33]:
execution.list_steps()

[{'StepName': 'BankChurnTransform',
  'StartTime': datetime.datetime(2024, 2, 20, 10, 13, 53, 4000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 20, 10, 19, 5, 235000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:transform-job/pipelines-0984dq4ydu65-BankChurnTransform-Wutkc5MPw7'}},
  'AttemptCount': 1},
 {'StepName': 'BankChurnCICDRegisterModel-RegisterModel',
  'StartTime': datetime.datetime(2024, 2, 20, 10, 13, 51, 39000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 20, 10, 13, 52, 405000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:model-package/FinalProjectCICDPackageGroupName/4'}},
  'AttemptCount': 1},
 {'StepName': 'BankChurnCreateModel-CreateModel',
  'StartTime': datetime.datetime(2024, 2, 20, 10, 13, 51, 39000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 20, 10, 13,

### Examining the Evaluation

In [34]:
from pprint import pprint


evaluation_json = sagemaker.s3.S3Downloader.read_file(
    "{}/evaluation.json".format(
        step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
    )
)
pprint(json.loads(evaluation_json))



{'classification_metrics': {'f1_score': 0.5196850393700787,
                            'precision': 0.7586206896551724,
                            'recall': 0.39520958083832336}}


### Lineage

In [35]:
import time
from sagemaker.lineage.visualizer import LineageTableVisualizer


viz = LineageTableVisualizer(sagemaker.session.Session())
for execution_step in reversed(execution.list_steps()):
    print(execution_step)
    display(viz.show(pipeline_execution_step=execution_step))
    time.sleep(5)

{'StepName': 'BankChurnProcess', 'StartTime': datetime.datetime(2024, 2, 20, 9, 59, 31, 385000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 2, 20, 10, 5, 46, 805000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:processing-job/pipelines-0984dq4ydu65-BankChurnProcess-aMkCmcGMax'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...8adaeaf0cd68202d698d233/preprocessing.py,Input,DataSet,ContributedTo,artifact
1,s3://...ictionCICD/ChurnPredictionMinusBatch.csv,Input,DataSet,ContributedTo,artifact
2,68331...com/sagemaker-scikit-learn:1.2-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...984dq4ydu65/BankChurnProcess/output/test,Output,DataSet,Produced,artifact
4,s3://...ydu65/BankChurnProcess/output/validation,Output,DataSet,Produced,artifact
5,s3://...84dq4ydu65/BankChurnProcess/output/train,Output,DataSet,Produced,artifact


{'StepName': 'BankChurnTrain', 'StartTime': datetime.datetime(2024, 2, 20, 10, 5, 47, 614000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 2, 20, 10, 8, 42, 683000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:training-job/pipelines-0984dq4ydu65-BankChurnTrain-CHHxUAJq20'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...ydu65/BankChurnProcess/output/validation,Input,DataSet,ContributedTo,artifact
1,s3://...84dq4ydu65/BankChurnProcess/output/train,Input,DataSet,ContributedTo,artifact
2,68331...naws.com/sagemaker-xgboost:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...hurnTrain-CHHxUAJq20/output/model.tar.gz,Output,Model,Produced,artifact


{'StepName': 'BankChurnModelEval', 'StartTime': datetime.datetime(2024, 2, 20, 10, 8, 43, 595000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 2, 20, 10, 13, 49, 185000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:processing-job/pipelines-0984dq4ydu65-BankChurnModelEval-ZLvCFHt1KN'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...ce409aa4b5d80635c3a0478a6a/evaluation.py,Input,DataSet,ContributedTo,artifact
1,s3://...984dq4ydu65/BankChurnProcess/output/test,Input,DataSet,ContributedTo,artifact
2,s3://...hurnTrain-CHHxUAJq20/output/model.tar.gz,Input,Model,ContributedTo,artifact
3,68331...naws.com/sagemaker-xgboost:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
4,s3://...024-02-20-09-59-28-465/output/evaluation,Output,DataSet,Produced,artifact


{'StepName': 'BankChurnF1Cond', 'StartTime': datetime.datetime(2024, 2, 20, 10, 13, 50, 65000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 2, 20, 10, 13, 50, 326000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'Condition': {'Outcome': 'True'}}, 'AttemptCount': 1}


None

{'StepName': 'BankChurnCreateModel-CreateModel', 'StartTime': datetime.datetime(2024, 2, 20, 10, 13, 51, 39000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 2, 20, 10, 13, 52, 444000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:model/pipelines-0984dq4ydu65-bankchurncreatemodel-596fqszcff'}}, 'AttemptCount': 1}


None

{'StepName': 'BankChurnCICDRegisterModel-RegisterModel', 'StartTime': datetime.datetime(2024, 2, 20, 10, 13, 51, 39000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 2, 20, 10, 13, 52, 405000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:model-package/FinalProjectCICDPackageGroupName/4'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...hurnTrain-CHHxUAJq20/output/model.tar.gz,Input,Model,ContributedTo,artifact
1,68331...naws.com/sagemaker-xgboost:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
2,FinalProjectCICDPackageGroupName-4-PendingManu...,Input,Approval,ContributedTo,action
3,FinalProjectCICDPackageGroupName-1708418443-aw...,Output,ModelGroup,AssociatedWith,context


{'StepName': 'BankChurnTransform', 'StartTime': datetime.datetime(2024, 2, 20, 10, 13, 53, 4000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 2, 20, 10, 19, 5, 235000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:transform-job/pipelines-0984dq4ydu65-BankChurnTransform-Wutkc5MPw7'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...hurnTrain-CHHxUAJq20/output/model.tar.gz,Input,Model,ContributedTo,artifact
1,68331...naws.com/sagemaker-xgboost:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
2,s3://...dictionCICD/Churn_Modelling_batch.libsvm,Input,DataSet,ContributedTo,artifact
3,s3://...s-east-1-075039479415/BankChurnTransform,Output,DataSet,Produced,artifact


## Parameterized Executions

In [36]:
execution = pipeline.start(
    parameters=dict(
        ModelApprovalStatus="Approved",
    )
)

In [None]:
execution.wait()

In [None]:
execution.list_steps()

[{'StepName': 'BankChurnTransform',
  'StartTime': datetime.datetime(2024, 2, 20, 10, 32, 13, 72000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 20, 10, 37, 12, 36000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:transform-job/pipelines-97lpnl8wr66h-BankChurnTransform-xIIobCQHEy'}},
  'AttemptCount': 1},
 {'StepName': 'BankChurnCICDRegisterModel-RegisterModel',
  'StartTime': datetime.datetime(2024, 2, 20, 10, 32, 11, 274000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 20, 10, 32, 12, 278000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:model-package/FinalProjectCICDPackageGroupName/5'}},
  'AttemptCount': 1},
 {'StepName': 'BankChurnCreateModel-CreateModel',
  'StartTime': datetime.datetime(2024, 2, 20, 10, 32, 11, 274000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 20, 10, 

Change the f1 score threshold

In [31]:
execution = pipeline.start(parameters=dict(F1Threshold=0.9))

The FailStep is activated and the execution will be marked as failed

In [32]:
try:
    execution.wait()
except Exception as error:
    print(error)

Waiter PipelineExecutionComplete failed: Waiter encountered a terminal failure state: For expression "PipelineExecutionStatus" we matched expected path: "Failed"


In [33]:
execution.list_steps()

[{'StepName': 'ChurnPredictionF1Fail',
  'StartTime': datetime.datetime(2024, 2, 21, 5, 39, 14, 773000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 21, 5, 39, 15, 150000, tzinfo=tzlocal()),
  'StepStatus': 'Failed',
  'FailureReason': 'Execution failed due to f1 < 0.9',
  'Metadata': {'Fail': {'ErrorMessage': 'Execution failed due to f1 < 0.9'}},
  'AttemptCount': 1},
 {'StepName': 'BankChurnF1Cond',
  'StartTime': datetime.datetime(2024, 2, 21, 5, 39, 14, 56000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 21, 5, 39, 14, 329000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'Condition': {'Outcome': 'False'}},
  'AttemptCount': 1},
 {'StepName': 'BankChurnModelEval',
  'StartTime': datetime.datetime(2024, 2, 21, 5, 34, 7, 976000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 2, 21, 5, 39, 13, 310000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:075039479415:pr