# AWS SageMaker Pipeline Runner

This notebook orchestrates the creation and execution of the ASD detection pipeline on AWS SageMaker.

In [None]:
import sagemaker
import boto3
import aws_config
import data_ingestion
from pipeline import get_pipeline

## 1. Setup Data
Download a small subset of data and upload it to S3.

In [None]:
# Downloads 5 subjects for testing pipeline
s3_uris = data_ingestion.prepare_data_and_upload()

## 2. Define and Upsert Pipeline
Create the pipeline (Preprocessing -> Training) in SageMaker.

In [None]:
role = aws_config.ROLE
if role is None:
    role = sagemaker.get_execution_role()

pipeline = get_pipeline(
    region=aws_config.REGION,
    role=role,
    default_bucket=aws_config.BUCKET_NAME
)

pipeline.upsert(role_arn=role)
print(f"Pipeline {pipeline.name} upserted.")

## 3. Start Pipeline Execution

In [None]:
execution = pipeline.start(
    parameters={
        "InputDataUrl": f"s3://{aws_config.BUCKET_NAME}/{aws_config.PREFIX}/raw"
    }
)

print(f"Pipeline execution started: {execution.arn}")

In [None]:
execution.wait()

In [None]:
execution.list_steps()