In [6]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

In [7]:
print(role)

arn:aws:iam::113188171590:role/service-role/AmazonSageMaker-ExecutionRole-20210613T133153


In [20]:
import pandas as pd
import boto3

dependancy_path = '/home/ec2-user/dependencies.py'
bucket='1834-testbucket'
data_key = 'Assignment2/housing.csv'
training_data = 's3://{}/{}'.format(bucket, data_key)
df = pd.read_csv(training_data)

In [21]:
df.head()

Unnamed: 0,-121.89,37.29,38.0,1568.0,351.0,710.0,339.0,2.7042,<1H OCEAN,286600.0
0,-121.93,37.05,14.0,679.0,108.0,306.0,113.0,6.4214,<1H OCEAN,340600.0
1,-117.2,32.77,31.0,1952.0,471.0,936.0,462.0,2.8621,NEAR OCEAN,196900.0
2,-119.61,36.31,25.0,1847.0,371.0,1460.0,353.0,1.8839,INLAND,46300.0
3,-118.59,34.23,17.0,6592.0,1525.0,4459.0,1463.0,3.0347,<1H OCEAN,254500.0
4,-120.97,37.66,24.0,2930.0,588.0,1448.0,570.0,3.5395,INLAND,127900.0


In [22]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"
script_path = '/home/ec2-user/sklearn_housing_featurizer.py'

sklearn_preprocessor = SKLearn(
    entry_point=script_path,
    role=role,
    framework_version=FRAMEWORK_VERSION,
    instance_type='ml.m4.xlarge',
    dependencies=[dependancy_path],
    sagemaker_session=sagemaker_session)

In [23]:
sklearn_preprocessor.fit({'train': training_data})

2021-06-14 14:06:01 Starting - Starting the training job...
2021-06-14 14:06:24 Starting - Launching requested ML instancesProfilerReport-1623679561: InProgress
......
2021-06-14 14:07:24 Starting - Preparing the instances for training.........
2021-06-14 14:08:44 Downloading - Downloading input data...
2021-06-14 14:09:30 Training - Training image download completed. Training in progress..[34m2021-06-14 14:09:31,786 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-06-14 14:09:31,790 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-14 14:09:31,802 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-06-14 14:09:32,164 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-14 14:09:35,206 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-14 14:09:35,221 s

In [25]:
# Define a SKLearn Transformer from the trained SKLearn Estimator
transformer = sklearn_preprocessor.transformer(
    instance_count=1, 
    instance_type='ml.m4.xlarge',
    assemble_with = 'Line',
    accept = 'text/csv')

In [26]:
# Preprocess training input
transformer.transform(training_data, content_type="text/csv")
print("Waiting for transform job: " + transformer.latest_transform_job.job_name)
transformer.wait()
preprocessed_train = transformer.output_path

.................................
[34m2021-06-14 14:15:52,915 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-14 14:15:52,917 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-14 14:15:52,918 INFO - sagemaker-containers - nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;
[0m
[34mworker_rlimit_nofile 4096;
[0m
[34mevents {
  worker_connections 2048;[0m
[34m}
[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;

  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }

  server {
    listen 8080 deferred;
    client_max_body_size 0;

    keepalive_timeout 3;

    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_red

In [27]:
preprocessed_train

's3://sagemaker-us-east-2-113188171590/sagemaker-scikit-learn-2021-06-14-14-10-32-381'

In [28]:
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
import boto3
from time import gmtime, strftime


timestamp_prefix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

scikit_learn_inference_model = sklearn_preprocessor.create_model()

model_name = 'inference-pipeline-' + timestamp_prefix
endpoint_name = 'inference-pipeline-ep-' + timestamp_prefix

#sm_model.sagemaker_session=None
scikit_learn_inference_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge', endpoint_name=endpoint_name)

---------------!

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f0c98ed8748>

In [29]:
from sagemaker.predictor import json_serializer, csv_serializer, json_deserializer, Predictor
#from sagemaker.content_types import CONTENT_TYPE_CSV, CONTENT_TYPE_JSON

csv_serializer = sagemaker.serializers.CSVSerializer(content_type="text/csv")
data = """-122.26,37.46,26,5067,750,1996,728,7.0001,NEAR OCEAN"""

endpoint_name = endpoint_name

predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sagemaker_session,
    serializer=csv_serializer)


print(predictor.predict(data))

b'{"instances": [{"features": [-1.3408764930120565, 0.8514634482951091, -0.21099315585118414, 1.1430410231864847, 0.5257730939190086, 0.5164773116405509, 0.6146760713962672, 1.6402561279658028, 0.5819441198401629, -0.030616763948385764, -1.0052566158906469, 0.0, 0.0, 0.0, 0.0, 1.0]}]}'
