In [3]:
import boto3
import sagemaker
from datetime import datetime
import json

# Set the region, bucket, and script locations
region = 'eu-west-2'
bucket = 'cmg-sagemaker-compliance-case-shared-notebooks'
script_key = 'phase_3_training.py'

# Set the SageMaker role
role = sagemaker.get_execution_role()

# Create a SageMaker client
sagemaker_client = boto3.client('sagemaker', region_name=region)

# Set the image URI
image_uri = sagemaker.image_uris.retrieve('sklearn', region, version='0.23-1')

# Set the S3 script URI
s3_script_uri = f's3://{bucket}/{script_key}'

# Create a pipeline definition
pipeline_definition = {
    "Version": "2020-12-01",
    "Metadata": {},
    "Parameters": [],
    "Steps": [
        {
            "Name": "MyProcessingStep",
            "Type": "Processing",
            "Arguments": {
                "ProcessingResources": {
                    "ClusterConfig": {
                        "InstanceCount": 1,
                        "InstanceType": "ml.m5.xlarge",
                        "VolumeSizeInGB": 30
                    }
                },
                "AppSpecification": {
                    "ImageUri": image_uri,
                    "ContainerEntrypoint": [
                        "python3",
                        "/opt/ml/processing/input/code/phase_3_training.py"
                    ]
                },
                "RoleArn": role,
                "ProcessingInputs": [
                    {
                        'InputName': 'code',
                        'S3Input': {
                            'S3Uri': s3_script_uri,
                            'LocalPath': '/opt/ml/processing/input/code',
                            'S3DataType': 'S3Prefix',
                            'S3InputMode': 'File',
                            'S3DataDistributionType': 'FullyReplicated',
                            'S3CompressionType': 'None'
                        }
                    }
                ],
                "StoppingCondition": {
                    "MaxRuntimeInSeconds": 3600
                }
            }
        }
    ]
}

# Generate a timestamp using datetime
timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')

# Create the pipeline
response = sagemaker_client.create_pipeline(
    PipelineName=f"MyPipeline-{timestamp}",
    PipelineDefinition=json.dumps(pipeline_definition),
    RoleArn=role
)

# Get the pipeline ARN
pipeline_arn = response['PipelineArn']

print(f'Created pipeline: {pipeline_arn}')


Created pipeline: arn:aws:sagemaker:eu-west-2:798861612849:pipeline/MyPipeline-2023-07-03-21-07-05
