## SageMaker Local Mode Sklearn Example

In [1]:
import boto3
import json
import os
import joblib
import pickle
import tarfile
import sagemaker
from sagemaker.estimator import Estimator
import time
from time import gmtime, strftime
import subprocess

#Setup
boto_session = boto3.session.Session()
s3 = boto_session.resource('s3')
region = boto_session.region_name
role = sagemaker.get_execution_role()

## Local Mode Files

In [2]:
%%writefile local_model.py
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
import joblib

#Load data
boston = datasets.load_boston()
df = pd.DataFrame(boston.data, columns = boston.feature_names)
df['MEDV'] = boston.target 

#Split Model
X = df.drop(['MEDV'], axis = 1) 
y = df['MEDV']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 42)

#Model Creation
lm = LinearRegression()
lm.fit(X_train,y_train)


with open('model.joblib', 'wb') as f:
    joblib.dump(lm,f)


with open('model.joblib', 'rb') as f:
    predictor = joblib.load(f)

print("Testing following input: ")
print(X_test[0:1])
sampInput = [[0.09178, 0.0, 4.05, 0.0, 0.51, 6.416, 84.1, 2.6463, 5.0, 296.0, 16.6, 395.5, 9.04]]
print(type(sampInput))
print(predictor.predict(sampInput))

Overwriting local_model.py


### Local Mode Training

Run local_model.py to generate a model.joblib (model data) and tar this with your inference.py script to create a model.tar.gz for your SageMaker Local Endpoint to work with.

In [3]:
!python local_model.py


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

### Inference Script

In [4]:
%%writefile inference.py
import joblib
import os
import json

"""
Deserialize fitted model
"""
def model_fn(model_dir):
    model = joblib.load(os.path.join(model_dir, "model.joblib"))
    return model

"""
input_fn
    request_body: The body of the request sent to the model.
    request_content_type: (string) specifies the format/variable type of the request

def input_fn(request_body, request_content_type):
    print("-----------------------")
    print(request_body)
    print(request_content_type)
    print("-----------------------")
    if request_content_type == 'application/json':
        request_body = json.loads(request_body)
        inpVar = request_body['Input']
        return inpVar
    else:
        raise ValueError("This model only supports application/json input")

"""

"""
predict_fn
    input_data: returned array from input_fn above
    model (sklearn model) returned model loaded from model_fn above
"""
def predict_fn(input_data, model):
    print("-----------------------")
    print(input_data)
    print(type(input_data))
    print("-----------------------")
    return model.predict(input_data)

"""
output_fn
    prediction: the returned value from predict_fn above
    content_type: the content type the endpoint expects to be returned. Ex: JSON, string
"""


def output_fn(prediction, content_type):
    res = int(prediction[0])
    respJSON = {'Output': res}
    print("-----------------------")
    print(respJSON)
    print("-----------------------")
    return respJSON

Overwriting inference.py


### Create model.tar.gz

In [5]:
#Build tar file with model data + inference code
bashCommand = "tar -cvpzf model.tar.gz model.joblib"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

## Push Model Data to S3 Bucket

In [6]:
bucket_name = "local-mode-mars"

In [7]:
!aws s3 mb "s3://{bucket_name}"

make_bucket: local-mode-mars


In [8]:
#Upload tar.gz to bucket
model_artifacts = f"s3://{bucket_name}/model.tar.gz"
response = s3.meta.client.upload_file('model.tar.gz', bucket_name, 'model.tar.gz')

## Enable Local Mode

In [9]:
import sagemaker
from sagemaker.local import LocalSession
from sagemaker.sklearn import SKLearn, SKLearnModel

role = sagemaker.get_execution_role()

In [10]:
session = LocalSession()
session.config = {'local': {'local_code': True}}
model = SKLearnModel(
    entry_point='inference.py',
    role=role,
    model_data=model_artifacts,
    framework_version='0.23-1',
)

In [11]:
print(type(session)) #verify local session

<class 'sagemaker.local.local_session.LocalSession'>


### Sample Payload

In [12]:
import numpy as np
payload = np.array([[0.09178, 0.0, 4.05, 0.0, 0.51, 6.416, 84.1, 2.6463, 5.0, 296.0, 16.6, 395.5, 9.04]])
payload

array([[9.1780e-02, 0.0000e+00, 4.0500e+00, 0.0000e+00, 5.1000e-01,
        6.4160e+00, 8.4100e+01, 2.6463e+00, 5.0000e+00, 2.9600e+02,
        1.6600e+01, 3.9550e+02, 9.0400e+00]])

## Sample Deployment & Inference

In [13]:
try:
    predictor = model.deploy(initial_instance_count=1, instance_type='local')
    print(predictor)
    preds = predictor.predict(payload)
    print(preds)
except Exception as e:
    print(e)

Attaching to 8pidzry78j-algo-1-es9ue
[36m8pidzry78j-algo-1-es9ue |[0m 2022-08-08 17:29:42,165 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36m8pidzry78j-algo-1-es9ue |[0m 2022-08-08 17:29:42,167 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36m8pidzry78j-algo-1-es9ue |[0m 2022-08-08 17:29:42,168 INFO - sagemaker-containers - nginx config: 
[36m8pidzry78j-algo-1-es9ue |[0m worker_processes auto;
[36m8pidzry78j-algo-1-es9ue |[0m daemon off;
[36m8pidzry78j-algo-1-es9ue |[0m pid /tmp/nginx.pid;
[36m8pidzry78j-algo-1-es9ue |[0m error_log  /dev/stderr;
[36m8pidzry78j-algo-1-es9ue |[0m 
[36m8pidzry78j-algo-1-es9ue |[0m worker_rlimit_nofile 4096;
[36m8pidzry78j-algo-1-es9ue |[0m 
[36m8pidzry78j-algo-1-es9ue |[0m events {
[36m8pidzry78j-algo-1-es9ue |[0m   worker_connections 2048;
[36m8pidzry78j-algo-1-es9ue |[0m }
[36m8pidzry78j-algo-1-es9ue |[0m 
[36m8pidzry78j-algo-1-es9ue |[0m http {
[36m8pidzry7