# Import Libraries

In [1]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

dependancy_path = 'dependencies.py'
#train_data = 's3://aws-sagemaker-one/datasets/input/housing_data.csv'

In [2]:
# S3 prefix
bucket = sagemaker_session.default_bucket()
prefix = 'Scikit-LinearLearner-pipeline-housing_sagemaker'

In [4]:
role = 'arn:aws:iam::600009802643:role/aws_sagemaker'

In [5]:
WORK_DIRECTORY = '.'

train_input = sagemaker_session.upload_data(
    path='{}/{}'.format(WORK_DIRECTORY, 'housing_data.csv'), 
    bucket=bucket,
    key_prefix='{}/{}'.format(prefix, 'train'))

# Create SageMaker Scikit Estimator

In [12]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"
script_path = 'sklearn_entry_point.py'

sklearn_preprocessor = SKLearn(
    entry_point=script_path,
    role=role,
    framework_version=FRAMEWORK_VERSION,
    instance_type='ml.m4.xlarge',
    dependencies=[dependancy_path],
    sagemaker_session=sagemaker_session)

In [13]:
sklearn_preprocessor.fit({'train': train_input})

2021-05-22 10:00:24 Starting - Starting the training job...ProfilerReport-1621677620: InProgress
..
2021-05-22 10:00:58 Starting - Launching requested ML instances..
2021-05-22 10:02:34 Starting - Preparing the instances for training...
2021-05-22 10:03:09 Downloading - Downloading input data...
2021-05-22 10:03:35 Training - Downloading the training image...
2021-05-22 10:04:02 Training - Training image download completed. Training in progress.[34m2021-05-22 10:04:03,561 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-05-22 10:04:03,564 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-22 10:04:03,585 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-05-22 10:04:03,942 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-22 10:04:06,974 sagemaker-training-toolkit INFO     No GPUs detected (normal 

# Batch transform the training dataÂ¶

In [14]:
# Define a SKLearn Transformer from the trained SKLearn Estimator
transformer = sklearn_preprocessor.transformer(
    instance_count=1, 
    instance_type='ml.m4.xlarge',
    assemble_with = 'Line',
    accept = 'text/csv')

In [15]:
# Preprocess training input
transformer.transform(train_input, content_type="text/csv")
print("Waiting for transform job: " + transformer.latest_transform_job.job_name)
transformer.wait()
preprocessed_train = transformer.output_path

.......................[34m2021-05-22 10:11:10,429 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-22 10:11:10,433 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-22 10:11:10,434 INFO - sagemaker-containers - nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;
[0m
[34mworker_rlimit_nofile 4096;
[0m
[34mevents {
  worker_connections 2048;[0m
[34m}
[0m
[35m2021-05-22 10:11:10,429 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[35m2021-05-22 10:11:10,433 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[35m2021-05-22 10:11:10,434 INFO - sagemaker-containers - nginx config: [0m
[35mworker_processes auto;[0m
[35mdaemon off;[0m
[35mpid /tmp/nginx.pid;[0m
[35merror_log  /dev/stderr;
[0m
[35mworker_rlimit_nofile 4096;
[0m
[35mevents {
  wo

In [16]:
preprocessed_train

's3://sagemaker-ap-south-1-600009802643/sagemaker-scikit-learn-2021-05-22-10-06-10-205'

# Real-Time transformation of the training data

In [17]:
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
import boto3
from time import gmtime, strftime


timestamp_prefix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

scikit_learn_inference_model = sklearn_preprocessor.create_model()

model_name = 'inference-pipeline-' + timestamp_prefix
endpoint_name = 'inference-pipeline-ep-' + timestamp_prefix

#sm_model.sagemaker_session=None
scikit_learn_inference_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge', endpoint_name=endpoint_name)

--------------!

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f0e4acb6e48>

# Make a request to the endpoint

In [18]:
from sagemaker.predictor import json_serializer, csv_serializer, json_deserializer, Predictor
#from sagemaker.content_types import CONTENT_TYPE_CSV, CONTENT_TYPE_JSON

csv_serializer = sagemaker.serializers.CSVSerializer(content_type="text/csv")
data = """-122.26,37.46,26,5067,750,1996,728,7.0001,NEAR OCEAN"""
endpoint_name = endpoint_name

predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sagemaker_session,
    serializer=csv_serializer)


print(predictor.predict(data))

b'{"instances": [{"features": [-1.3428091431408828, 0.8559099238258896, -0.20972852048599255, 1.1144475705439372, 0.5082747630741923, 0.5038023490287747, 0.5975623207021221, 1.6472623615670152, 0.6188743067739607, -0.03166794876738965, -1.0081373598970214, 0.0, 0.0, 0.0, 0.0, 1.0]}]}'
