# Setup libraries, parameters and roles

In [1]:

import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.processing import ScriptProcessor, Processor, ProcessingInput, ProcessingOutput


In [2]:

s3_bucket   = 'test-container-sagemaker-processing'
script_name = 'test_container.py'
image_uri   = '684530550045.dkr.ecr.us-east-2.amazonaws.com/train_automl'

parameter1  = 'hello world'


In [3]:

session           = boto3.session.Session(profile_name = 'default')
sagemaker_session = sagemaker.Session(boto_session = session)
role              = 'arn:aws:iam::684530550045:role/sagemaker-role'


# Write test_container.py

In [4]:
%%writefile test_container.py

import h2o
import logging
import argparse

from opt.program.shared_core_data_processing import * 

if __name__ == '__main__':
    
    # Parse parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--parameter1')
    
    args, _ = parser.parse_known_args()
    
    parameter1 = args.parameter1
    
    print(parameter1)
    
    h2o.init()
    

Overwriting test_container.py


# Upload .py from local to S3

In [5]:

sagemaker_session.upload_data(bucket = s3_bucket, key_prefix = 'scripts', path = script_name)


's3://test-container-sagemaker-processing/scripts/test_container.py'

# Run Sagemaker Processing Job

In [6]:

processor = ScriptProcessor(
                            image_uri              = image_uri,
                            role                   = role,
                            instance_count         = 1,
                            instance_type          = 'ml.m5.large',
                            command                = ['python3'],
                            sagemaker_session      = sagemaker_session,
                            max_runtime_in_seconds = 1800,
                            base_job_name          = 'train-automl'
                           )

processor.run(
              code      = f's3://{s3_bucket}/scripts/{script_name}',
              arguments = [
                           '--parameter1', parameter1
                          ],
              wait      = False
             )



Job Name:  train-automl-2022-04-27-13-28-32-545
Inputs:  [{'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://test-container-sagemaker-processing/scripts/test_container.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  []
