In [None]:
!pip install sagemaker --upgrade

In [None]:
from sagemaker import get_execution_role
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput

from sklearn.datasets import load_iris
import numpy as np
import os

In [None]:
x, y = load_iris(return_X_y=True)

In [None]:
np.save('data.npy', np.append(x, y[:,np.newaxis], axis=1))

In [None]:
s3_data_path = sagemaker.Session().upload_data(path='data.npy', 
                                               key_prefix='sagemaker/preprocessing/input', 
                                               bucket='gl-ml-training-sagemaker')
s3_data_path

In [None]:
os.remove('data.npy')

In [None]:
sklearn_processor = SKLearnProcessor(framework_version='0.20.0',
                                     role=get_execution_role(),
                                     instance_count=1,
                                     instance_type='ml.m5.large')

In [None]:
sklearn_processor.run(
    code='preprocessing.py',
    inputs=[ProcessingInput(source=s3_data_path,
                            destination='/opt/ml/processing/input')],
    outputs=[ProcessingOutput(source='/opt/ml/processing/output/train', 
                              destination='s3://gl-ml-training-sagemaker/sagemaker/preprocessing/output/train', 
                              output_name='train'),
             ProcessingOutput(source='/opt/ml/processing/output/validation', 
                              destination='s3://gl-ml-training-sagemaker/sagemaker/preprocessing/output/validation', 
                              output_name='validation'),
             ProcessingOutput(source='/opt/ml/processing/output/test', 
                              destination='s3://gl-ml-training-sagemaker/sagemaker/preprocessing/output/test', 
                              output_name='test')]
)