In [None]:
%%sh
pip -q install pandas scikit-learn joblib

# Vanilla code

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.externals import joblib

data = pd.read_csv('housing.csv')
labels = data[['medv']]
samples = data.drop(['medv'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(samples, labels, test_size=0.1, random_state=123)
regr = LinearRegression(normalize=True)
regr.fit(X_train, y_train)
y_pred = regr.predict(X_test)
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))
joblib.dump(regr, 'model.joblib')

Mean squared error: 41.82
Coefficient of determination: 0.63


['model.joblib']

In [None]:
%%sh
export SM_CHANNEL_TRAINING=
export SM_MODEL_DIR=
python sklearn-boston-housing.py --normalize True --test-size 0.1 --training . --model-dir .
ls -l model.joblib

## Run with SageMaker Local Mode

In [None]:
%%sh
pip install -q -t . --upgrade joblib

In [2]:
import sagemaker
from sagemaker.sklearn import SKLearn

training = 'file://.'
output = 'file://.'

role = sagemaker.get_execution_role()
sk = SKLearn(entry_point='sklearn-boston-housing.py',
             role=role,
             train_instance_count=1, 
             train_instance_type='local',
             output_path=output,
             hyperparameters={
                  'normalize': True,
                  'test-size': 0.1
              }
)

sk.fit({'training':training})

This is not the latest supported version. If you would like to use version 0.23-1, please add framework_version=0.23-1 to your constructor.


Creating tmpx_m2705n_algo-1-jai3v_1 ... 
[1BAttaching to tmpx_m2705n_algo-1-jai3v_12mdone[0m
[36malgo-1-jai3v_1  |[0m 2020-07-20 13:39:43,655 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
[36malgo-1-jai3v_1  |[0m 2020-07-20 13:39:43,658 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-jai3v_1  |[0m 2020-07-20 13:39:43,667 sagemaker_sklearn_container.training INFO     Invoking user training script.
[36malgo-1-jai3v_1  |[0m 2020-07-20 13:39:43,775 sagemaker-containers INFO     Module sklearn-boston-housing does not provide a setup.py. 
[36malgo-1-jai3v_1  |[0m Generating setup.py
[36malgo-1-jai3v_1  |[0m 2020-07-20 13:39:43,775 sagemaker-containers INFO     Generating setup.cfg
[36malgo-1-jai3v_1  |[0m 2020-07-20 13:39:43,776 sagemaker-containers INFO     Generating MANIFEST.in
[36malgo-1-jai3v_1  |[0m 2020-07-20 13:39:43,776 sagemaker-containers INFO     Installing module with the following 

[36malgo-1-jai3v_1  |[0m 2020-07-20 13:39:45,398 sagemaker-containers INFO     Reporting training SUCCESS
[36mtmpx_m2705n_algo-1-jai3v_1 exited with code 0
[0mAborting on container exit...
===== Job Complete =====


In [3]:
sk_predictor = sk.deploy(initial_instance_count=1, instance_type='local')

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


Attaching to tmpfa1qzf98_algo-1-7ptcz_1
[36malgo-1-7ptcz_1  |[0m Processing /opt/ml/code
[36malgo-1-7ptcz_1  |[0m Building wheels for collected packages: sklearn-boston-housing
[36malgo-1-7ptcz_1  |[0m   Building wheel for sklearn-boston-housing (setup.py) ... [?25ldone
[36malgo-1-7ptcz_1  |[0m [?25h  Created wheel for sklearn-boston-housing: filename=sklearn_boston_housing-1.0.0-py2.py3-none-any.whl size=6000 sha256=a6295afd04da1a853ddbfd4f690effca8afdab90c4da8fa67e99884b6bd78f3a
[36malgo-1-7ptcz_1  |[0m   Stored in directory: /tmp/pip-ephem-wheel-cache-oyizfvft/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3
[36malgo-1-7ptcz_1  |[0m Successfully built sklearn-boston-housing
[36malgo-1-7ptcz_1  |[0m Installing collected packages: sklearn-boston-housing
[36malgo-1-7ptcz_1  |[0m Successfully installed sklearn-boston-housing-1.0.0
[36malgo-1-7ptcz_1  |[0m   import imp
[36malgo-1-7ptcz_1  |[0m [2020-07-20 13:39:49 +0000] [72] [INFO] Starting gunico

In [4]:
data = pd.read_csv('housing.csv')
payload = data[:10].drop(['medv'], axis=1) 
payload = payload.to_csv(header=False, index=False)
print(payload)

0.00632,18.0,2.31,0,0.5379999999999999,6.575,65.2,4.09,1,296.0,15.3,4.98
0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,9.14
0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,4.03
0.03237,0.0,2.18,0,0.45799999999999996,6.997999999999999,45.8,6.0622,3,222.0,18.7,2.94
0.06905,0.0,2.18,0,0.45799999999999996,7.147,54.2,6.0622,3,222.0,18.7,5.33
0.02985,0.0,2.18,0,0.45799999999999996,6.43,58.7,6.0622,3,222.0,18.7,5.21
0.08829,12.5,7.87,0,0.524,6.0120000000000005,66.6,5.5605,5,311.0,15.2,12.43
0.14455,12.5,7.87,0,0.524,6.172000000000001,96.1,5.9505,5,311.0,15.2,19.15
0.21124,12.5,7.87,0,0.524,5.631,100.0,6.0821,5,311.0,15.2,29.93
0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311.0,15.2,17.1



In [5]:
from sagemaker.predictor import csv_serializer, csv_deserializer
sk_predictor = sk.deploy(initial_instance_count=1, 
                         instance_type='local')
data = pd.read_csv('housing.csv')
payload = data[:10].drop(['medv'], axis=1) 
payload = payload.to_csv(header=False, index=False)
sk_predictor.content_type = 'text/csv'
sk_predictor.accept = 'text/csv'
sk_predictor.serializer = csv_serializer
sk_predictor.deserializer = csv_deserializer
response = sk_predictor.predict(payload)

print(response)

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


[36malgo-1-7ptcz_1  |[0m 2020-07-20 13:39:53,661 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36malgo-1-7ptcz_1  |[0m   import imp
[36malgo-1-7ptcz_1  |[0m 172.18.0.1 - - [20/Jul/2020:13:39:54 +0000] "GET /ping HTTP/1.1" 200 0 "-" "-"
![36malgo-1-7ptcz_1  |[0m 2020-07-20 13:39:54,149 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36malgo-1-7ptcz_1  |[0m   import imp
[['29.801388899699845'], ['24.990809475886074'], ['30.7379654455552'], ['28.786967125316544'], ['28.1421501991961'], ['25.301714533101716'], ['22.717977231840184'], ['19.302415613883348'], ['11.369520911229536'], ['18.785593532977657']][36malgo-1-7ptcz_1  |[0m 172.18.0.1 - - [20/Jul/2020:13:39:54 +0000] "POST /invocations HTTP/1.1" 200 186 "-" "-"



Exception in thread Thread-5:
Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/local/image.py", line 618, in run
    _stream_output(self.process)
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/local/image.py", line 677, in _stream_output
    raise RuntimeError("Process exited with code: %s" % exit_code)
RuntimeError: Process exited with code: 1

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/local/image.py", line 623, in run
    raise RuntimeError(msg)
RuntimeError: Failed to run: ['docker-compose', '-f', '/tmp/tmpv66gtfrt/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1



In [None]:
sk_predictor.delete_endpoint()

## Run with SageMaker managed infrastructure

In [None]:
sess   = sagemaker.Session()
bucket = sess.default_bucket()                     
prefix = 'sklearn-boston-housing'

training = sess.upload_data(path='housing.csv', key_prefix=prefix + "/training")
output   = 's3://{}/{}/output/'.format(bucket,prefix)
print(training)
print(output)

In [None]:
sk = SKLearn(entry_point='sklearn-boston-housing.py',
             role=role,
             train_instance_count=1, 
             train_instance_type='ml.m5.large',
             output_path=output,
             hyperparameters={
                  'normalize': True,
                  'test-size': 0.1
              }
)

sk.fit({'training':training})

In [None]:
sk_predictor = sk.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

You can reuse the cells above for prediction.

In [None]:
sk_predictor.delete_endpoint()