In [27]:
import sagemaker
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import ProcessingStep, TrainingStep
from sagemaker.processing import FrameworkProcessor, ProcessingInput, ProcessingOutput
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.steps import CacheConfig

In [28]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()
region = sagemaker_session.boto_region_name
raw_data_s3_uri = "s3://sagemakerantdata/ChurnPrediction/raw/"

In [29]:
sklearn_processor = SKLearnProcessor(
    framework_version="1.2-1",
    role=role,
    instance_type="ml.t3.medium",
    instance_count=1,
)

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [43]:
cache_config = CacheConfig(enable_caching=True, expire_after="30d")

data_prep = ProcessingStep(
    name="ChurnDataPrep",
    processor=sklearn_processor,
    code="scripts/data_ingestion.py",
    inputs=[
        ProcessingInput(source=raw_data_s3_uri, destination="/opt/ml/processing/input")
    ],
    outputs=[
        ProcessingOutput(
            output_name="train",
            source="/opt/ml/processing/output/train",
            destination="s3://sagemakerantdata/ChurnPrediction/processed/train"
        ),
        ProcessingOutput(
            output_name="test",
            source="/opt/ml/processing/output/test",
            destination="s3://sagemakerantdata/ChurnPrediction/processed/test"
        ),
    ],
)

In [44]:
pipeline = Pipeline(
    name="ChurnPredictionPipeline",
    parameters=[], # You can define pipeline-level parameters here
    steps=[data_prep],
    sagemaker_session=sagemaker_session,
)

# Submit the pipeline definition to SageMaker
pipeline.upsert(role_arn=role)

# Start a pipeline execution
execution = pipeline.start()
execution.describe()



{'PipelineArn': 'arn:aws:sagemaker:ap-south-1:891377324517:pipeline/ChurnPredictionPipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:ap-south-1:891377324517:pipeline/ChurnPredictionPipeline/execution/jienh8af2htr',
 'PipelineExecutionDisplayName': 'execution-1758199815263',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2025, 9, 18, 12, 50, 15, 211000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2025, 9, 18, 12, 50, 15, 211000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:ap-south-1:891377324517:user-profile/d-51dx373wnss0/default-20250903T125584',
  'UserProfileName': 'default-20250903T125584',
  'DomainId': 'd-51dx373wnss0',
  'IamIdentity': {'Arn': 'arn:aws:sts::891377324517:assumed-role/AmazonSageMaker-ExecutionRole-20250903T125584/SageMaker',
   'PrincipalId': 'AROA47CR2SHSWJ6BV5G2F:SageMaker'}},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:ap-south-1:891377324517:user-profile/d-51dx373wnss0/defa

In [45]:
steps = execution.list_steps()
for step in steps:
    print(f"{step['StepName']} → {step['StepStatus']}")
    if 'FailureReason' in step:
        print(f"   Reason: {step ['FailureReason']}")


ChurnDataPrep → Succeeded
