In [1]:
# # please ignore warning messages during the installation
# !pip install --upgrade sagemaker
# !pip install --disable-pip-version-check -q 
# !conda install -q -y pytorch==1.6.0 -c pytorch
# !pip install --disable-pip-version-check -q transformers==3.5.1
# !pip install -q protobuf==3.20.*

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [3]:
import boto3
import sagemaker
import pandas as pd
import botocore

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [4]:
sagemaker.__version__

'2.227.0'

In [5]:
# ========================== low-level service client of the boto3 session ==========================
config = botocore.config.Config(user_agent_extra='bedissj-1699438736259')


sm = boto3.client(service_name='sagemaker', 
                  config=config)

sm_runtime = boto3.client('sagemaker-runtime',
                          config=config)

sess = sagemaker.Session(sagemaker_client=sm,
                         sagemaker_runtime_client=sm_runtime)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name

cw = boto3.client(service_name='cloudwatch', 
                  config=config)

autoscale = boto3.client(service_name="application-autoscaling", 
                         config=config)

## 1. Create an endpoint with multiple variants 

In [6]:
model_a_s3_uri = "s3://{}/y8a6c91lumhh-ModelTra-GTH766MIe3-002-1a2991b7/output/model.tar.gz".format(bucket)
model_b_s3_uri = "s3://{}/y8a6c91lumhh-ModelTra-GTH766MIe3-010-b1656de1/output/model.tar.gz".format(bucket)

### 1.1 Construct Docker Image URI

In [7]:
FRAMEWORK_VERSION = '1.0-1'
deploy_instance_type = "ml.m5.large"

In [8]:
churn_inference_image_uri = sagemaker.image_uris.retrieve(
    framework='sklearn',
    version=FRAMEWORK_VERSION,
    instance_type=deploy_instance_type,
    image_scope="inference",
    region=region
)

print(churn_inference_image_uri)

659782779980.dkr.ecr.eu-west-3.amazonaws.com/sagemaker-scikit-learn:1.0-1-cpu-py3


In [9]:
import time

timestamp = int(time.time())

model_name_a = "churn-prediction-mon1-model-a-{}".format(timestamp)
model_name_b = "churn-prediction-mon1-model-b-{}".format(timestamp)


Check if models already exist.

In [10]:
def check_model_existence(model_name):
    for model in sm.list_models()['Models']:
        if model_name == model['ModelName']:
            return True
    return False

### 1.2 Create models

In [11]:
from sagemaker.sklearn.model import SKLearnModel
from pprint import pprint


if not check_model_existence(model_name=model_name_a):
    model_a = SKLearnModel(
        name=model_name_a,
        model_data=model_a_s3_uri,
        image_uri=churn_inference_image_uri,
        entry_point='./src/inference.py',
        sagemaker_session=sess,
        role=role
    )
    model_a.create()
    pprint(model_a)
    
else:
    print("Model {} already exists".format(model_name_a))

<sagemaker.sklearn.model.SKLearnModel object at 0x7f69fd051150>


In [12]:
if not check_model_existence(model_name=model_name_b):
    # model_b = sm.create_model(
    #     ModelName=model_name_b,
    #     ExecutionRoleArn=role,
    #     PrimaryContainer={
    #         "ModelDataUrl": model_b_s3_uri,
    #         "Image":churn_inference_image_uri,
    #     }
    # )
    # pprint(model_b)

    model_b = SKLearnModel(
        name=model_name_b,
        model_data=model_b_s3_uri,
        image_uri=churn_inference_image_uri,
        entry_point='./src/inference.py',
        sagemaker_session=sess,
        role=role
    )
    model_b.create()
    pprint(model_b)

else:
    print("Model {} already exists".format(model_name_b))

<sagemaker.sklearn.model.SKLearnModel object at 0x7f69fca0a290>


### 1.3 Create production variants

In [13]:
from sagemaker.session import production_variant

variantA = production_variant(
    model_name=model_name_a,
    instance_type=deploy_instance_type,
    initial_instance_count=1,
    initial_weight=50,
    variant_name='VariantA'
)
print(variantA)

{'VariantName': 'VariantA', 'ModelName': 'churn-prediction-mon1-model-a-1730389131', 'InitialVariantWeight': 50, 'InitialInstanceCount': 1, 'InstanceType': 'ml.m5.large'}


In [14]:
variantB = production_variant(
    model_name=model_name_b,
    instance_type=deploy_instance_type,
    initial_instance_count=1,
    initial_weight=50,
    variant_name='VariantB'
)
print(variantA)

{'VariantName': 'VariantA', 'ModelName': 'churn-prediction-mon1-model-a-1730389131', 'InitialVariantWeight': 50, 'InitialInstanceCount': 1, 'InstanceType': 'ml.m5.large'}


### 1.4 Configure and create the endpoint

Check endpoint configuration existence.

In [15]:
def check_endpoint_config_existence(endpoint_config_name):
    for endpoint_config in sm.list_endpoint_configs()['EndpointConfigs']:
        if endpoint_config_name == endpoint_config['EndpointConfigName']:
            return True
    return False


def check_endpoint_existence(endpoint_name):
    for endpoint in sm.list_endpoints()['Endpoints']:
        if endpoint_name == endpoint['EndpointName']:
            return True
    return False

Create endpoint configuration for  A/B testing.

In [16]:
endpoint_config_name = "churn-prediction-ab-epc-{}".format(timestamp)

if not check_endpoint_config_existence(endpoint_config_name):
    endpoint_config = sm.create_endpoint_config(
        EndpointConfigName=endpoint_config_name,
        ProductionVariants=[variantA, variantB]
    )
    pprint(endpoint_config)
else: 
     print("Endpoint configuration {} already exists".format(endpoint_config_name))

{'EndpointConfigArn': 'arn:aws:sagemaker:eu-west-3:668303144976:endpoint-config/churn-prediction-ab-epc-1730389131',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '115',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Thu, 31 Oct 2024 15:38:55 GMT',
                                      'x-amzn-requestid': 'dba847b5-8317-4784-8fbd-272cd2cdec12'},
                      'HTTPStatusCode': 200,
                      'RequestId': 'dba847b5-8317-4784-8fbd-272cd2cdec12',
                      'RetryAttempts': 0}}


In [17]:
model_ab_endpoint_name = 'churn-prediction-ab-ep-{}'.format(timestamp)
print('Endpoint name: {}'.format(model_ab_endpoint_name))

Endpoint name: churn-prediction-ab-ep-1730389131


In [18]:
if not check_endpoint_existence(model_ab_endpoint_name):
    endpoint_response = sm.create_endpoint(
        EndpointName=model_ab_endpoint_name,
        EndpointConfigName=endpoint_config_name
    )
    print('Creating endpoint {}'.format(model_ab_endpoint_name))
    pprint(endpoint_response)
else:
    print("Endpoint {} already exists".format(model_ab_endpoint_name))

Creating endpoint churn-prediction-ab-ep-1730389131
{'EndpointArn': 'arn:aws:sagemaker:eu-west-3:668303144976:endpoint/churn-prediction-ab-ep-1730389131',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '101',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Thu, 31 Oct 2024 15:38:55 GMT',
                                      'x-amzn-requestid': '858dfb14-c09a-4899-a7f6-69235c4b710c'},
                      'HTTPStatusCode': 200,
                      'RequestId': '858dfb14-c09a-4899-a7f6-69235c4b710c',
                      'RetryAttempts': 0}}


In [19]:
from IPython.core.display import display, HTML

display(
    HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/endpoints/{}">SageMaker REST endpoint</a></b>'.format(
        region, 
        model_ab_endpoint_name)
        )
)

  from IPython.core.display import display, HTML


In [20]:
%%time

waiter = sm.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=model_ab_endpoint_name)

CPU times: user 28.6 ms, sys: 10.9 ms, total: 39.5 ms
Wall time: 3min 30s


In [21]:
n_preds = 3

tst = pd.read_csv("./src/BankChurners_mon1 (4).csv")
tst.drop(columns=[tst.columns[0], tst.columns[1]], inplace=True)

display(tst.head(n_preds))

Unnamed: 0,56,1.1,2,6,1.2,2.1,0.1,49,3,3.1,3.2,4058.0,793,3265.0,0.758,15865,105,0.667,0.195
0,47,1,2,0,1,0,0,35,4,3,2,3636.0,1010,2626.0,0.596,1564,59,0.686,0.278
1,61,1,0,2,1,2,0,56,6,2,3,9918.0,850,9068.0,0.688,2485,40,0.29,0.086
2,40,1,3,3,1,1,0,33,4,3,3,6884.0,1001,5883.0,0.786,975,11,0.833,0.145


In [22]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer, JSONLinesSerializer, JSONSerializer
from sagemaker.deserializers import JSONDeserializer, CSVDeserializer

predictor = Predictor(
    endpoint_name=model_ab_endpoint_name,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer(),
    sagemaker_session=sess
)


Complete prediction script.

In [23]:
import json
from pprint import pprint


payload = tst.head(n_preds).values
pprint(payload)

array([[4.700e+01, 1.000e+00, 2.000e+00, 0.000e+00, 1.000e+00, 0.000e+00,
        0.000e+00, 3.500e+01, 4.000e+00, 3.000e+00, 2.000e+00, 3.636e+03,
        1.010e+03, 2.626e+03, 5.960e-01, 1.564e+03, 5.900e+01, 6.860e-01,
        2.780e-01],
       [6.100e+01, 1.000e+00, 0.000e+00, 2.000e+00, 1.000e+00, 2.000e+00,
        0.000e+00, 5.600e+01, 6.000e+00, 2.000e+00, 3.000e+00, 9.918e+03,
        8.500e+02, 9.068e+03, 6.880e-01, 2.485e+03, 4.000e+01, 2.900e-01,
        8.600e-02],
       [4.000e+01, 1.000e+00, 3.000e+00, 3.000e+00, 1.000e+00, 1.000e+00,
        0.000e+00, 3.300e+01, 4.000e+00, 3.000e+00, 3.000e+00, 6.884e+03,
        1.001e+03, 5.883e+03, 7.860e-01, 9.750e+02, 1.100e+01, 8.330e-01,
        1.450e-01]])


In [24]:
predictor.predict(payload, target_variant='VariantB')

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from primary with message "<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>
". See https://eu-west-3.console.aws.amazon.com/cloudwatch/home?region=eu-west-3#logEventViewer:group=/aws/sagemaker/Endpoints/churn-prediction-ab-ep-1730389131 in account 668303144976 for more information.

Cleanup resources

In [25]:
predictor.delete_model()
predictor.delete_endpoint()