In [35]:
library('reticulate')
sagemaker <- import('sagemaker')
boto3 <- import('boto3')
role <- sagemaker$get_execution_role()

In [36]:
processing_repository_uri <- "581320662326.dkr.ecr.us-east-1.amazonaws.com/sagemaker-processing-r:2"

session <- boto3$session$Session()
sagemaker_session <- sagemaker$Session(
    boto_session=session)

In [37]:
ScriptProcessor <- sagemaker$processing$ScriptProcessor

script_processor <- ScriptProcessor(
    command=list('Rscript'),
    image_uri=processing_repository_uri,
    role=role,
    sagemaker_session=sagemaker_session,
    instance_count=1L,
    instance_type='ml.m5.xlarge')

In [38]:
ProcessingInput <- sagemaker$processing$ProcessingInput
ProcessingOutput <- sagemaker$processing$ProcessingOutput

In [39]:
source <- 'tmp/dataset.processing.csv'
pinput1 <- ProcessingInput(source=source, destination='/opt/ml/processing/input')
poutput1 <- ProcessingOutput(source='/opt/ml/processing/output')

In [40]:
script_processor$run(code='processing.r',
                     inputs=list(pinput1),
                     outputs=list(poutput1),
                     arguments=list('--sample-argument','3'),
                     wait=TRUE)

In [41]:
cmd <- function(bash_command) {
    print(bash_command)
    output <- system(bash_command, intern=TRUE)
    last_line = ""
    
    for (line in output) { 
        cat(line)
        cat("\n")
        last_line = line 
    }
    
    return(last_line) 
}

In [42]:
cmd('pip install awslogs')

[1] "pip install awslogs"


In [43]:
# Make sure SageMaker Execution Role has CloudWatchLogsReadOnlyAccess
cmd("awslogs get /aws/sagemaker/ProcessingJobs -s1h --aws-region=us-east-1")

[1] "awslogs get /aws/sagemaker/ProcessingJobs -s1h --aws-region=us-east-1"
/aws/sagemaker/ProcessingJobs sagemaker-scikit-learn-2021-04-18-19-34-01-181/algo-1-1618774641 Namespace(sample_argument=3)
/aws/sagemaker/ProcessingJobs sagemaker-scikit-learn-2021-04-18-19-34-01-181/algo-1-1618774641 Collecting matplotlib
  Downloading matplotlib-3.4.1-cp37-cp37m-manylinux1_x86_64.whl (10.3 MB)
/aws/sagemaker/ProcessingJobs sagemaker-scikit-learn-2021-04-18-19-34-01-181/algo-1-1618774641 Collecting kiwisolver>=1.0.1
  Downloading kiwisolver-1.3.1-cp37-cp37m-manylinux1_x86_64.whl (1.1 MB)
/aws/sagemaker/ProcessingJobs sagemaker-scikit-learn-2021-04-18-19-34-01-181/algo-1-1618774641 Collecting pillow>=6.2.0
  Downloading Pillow-8.2.0-cp37-cp37m-manylinux1_x86_64.whl (3.0 MB)
/aws/sagemaker/ProcessingJobs sagemaker-scikit-learn-2021-04-18-19-34-01-181/algo-1-1618774641 Collecting cycler>=0.10
  Downloading cycler-0.10.0-py2.py3-none-any.whl (6.5 kB)
/aws/sagemaker/ProcessingJobs sagemaker-scikit

In [44]:
latest_job <- script_processor$latest_job
destination <- latest_job$outputs[[1]]$destination
destination

In [45]:
csv_path <- paste0(destination, "/output.csv")
command <- paste("aws s3 cp", csv_path, "tmp/output.processing.r.csv")

In [46]:
cmd(command)

[1] "aws s3 cp s3://sagemaker-us-east-1-581320662326/sagemaker-processing-r-2021-04-18-20-24-00-413/output/output-1/output.csv tmp/output.processing.r.csv"
Completed 7 Bytes/7 Bytes (104 Bytes/s) with 1 file(s) remainingdownload: s3://sagemaker-us-east-1-581320662326/sagemaker-processing-r-2021-04-18-20-24-00-413/output/output-1/output.csv to tmp/output.processing.r.csv


In [60]:
read.csv("tmp/output.processing.r.csv", header=FALSE)[[1]]