In [None]:
%%sh
pygmentize sklearn-boston-housing.py

In [None]:
import sagemaker

print(sagemaker.__version__)

sess   = sagemaker.Session()
bucket = sess.default_bucket()                     
prefix = 'sklearn-boston-housing'

training = sess.upload_data(path='housing.csv', key_prefix=prefix + "/training")
output   = 's3://{}/{}/output/'.format(bucket,prefix)
print(training)
print(output)

In [None]:
from sagemaker.sklearn import SKLearn

role = sagemaker.get_execution_role()

sk = SKLearn(entry_point='sklearn-boston-housing.py',
             role=role,
             framework_version='0.20.0',
             instance_count=1, 
             instance_type='ml.m5.large',
             output_path=output,
             hyperparameters={
                  'normalize': True,
                  'test-size': 0.1
              }
)

sk.fit({'training':training})

In [None]:
import pandas as pd

data = pd.read_csv('housing.csv')
data.drop(['medv'], axis=1, inplace=True)
data.to_csv('data.csv', header=False, index=False)

batch_input = sess.upload_data(path='data.csv', key_prefix=prefix + "/batch")

In [None]:
sk_transformer = sk.transformer(instance_count=1, instance_type='ml.m5.large')

sk_transformer.transform(batch_input, content_type='text/csv', wait=True, logs=True)

In [None]:
print(sk_transformer.output_path)

In [None]:
%%bash -s "$sk_transformer.output_path"
aws s3 ls $1/
aws s3 cp $1/data.csv.out .
head -1 data.csv.out