In [2]:
!pip install -q --upgrade pip

[0m

In [3]:
!pip show sagemaker

Name: sagemaker
Version: 2.209.0
Summary: Open source library for training and deploying models on Amazon SageMaker.
Home-page: https://github.com/aws/sagemaker-python-sdk/
Author: Amazon Web Services
Author-email: 
License: Apache License 2.0
Location: /opt/conda/lib/python3.10/site-packages
Requires: attrs, boto3, cloudpickle, docker, google-pasta, importlib-metadata, jsonschema, numpy, packaging, pandas, pathos, platformdirs, protobuf, psutil, PyYAML, requests, schema, smdebug-rulesconfig, tblib, tqdm, urllib3
Required-by: 


In [4]:
!pip install -q --upgrade sagemaker

[0m

In [5]:
%%time

from datetime import datetime, timedelta, timezone
import json
import os
import re
import boto3
from time import sleep
from threading import Thread

import pandas as pd

from sagemaker import get_execution_role, session, Session, image_uris
from sagemaker.s3 import S3Downloader, S3Uploader
from sagemaker.processing import ProcessingJob
from sagemaker.serializers import CSVSerializer

from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig

session = Session()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
CPU times: user 1.63 s, sys: 240 ms, total: 1.87 s
Wall time: 1.92 s


In [6]:
# Get Execution role
role = get_execution_role()
print("RoleArn:", role)

region = session.boto_region_name
print("Region:", region)

RoleArn: arn:aws:iam::911199926915:role/LabRole
Region: us-east-1


In [7]:
# Setup S3 bucket
# You can use a different bucket, but make sure the role you chose for this notebook
# has the s3:PutObject permissions. This is the bucket into which the data is captured
bucket = session.default_bucket()
print("Bucket:", bucket)
prefix = "sagemaker/LGBM-ModelQualityMonitor-20240225test"

##S3 prefixes
data_capture_prefix = f"{prefix}/datacapture"
s3_capture_upload_path = f"s3://{bucket}/{data_capture_prefix}"

ground_truth_upload_path = (
    f"s3://{bucket}/{prefix}/ground_truth_data/{datetime.now():%Y-%m-%d-%H-%M-%S}"
)

reports_prefix = f"{prefix}/reports"
s3_report_path = f"s3://{bucket}/{reports_prefix}"

##Get the model monitor image
monitor_image_uri = image_uris.retrieve(framework="model-monitor", region=region)

print("Image URI:", monitor_image_uri)
print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

Bucket: sagemaker-us-east-1-911199926915
Image URI: 156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer
Capture path: s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/datacapture
Ground truth path: s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/ground_truth_data/2024-02-26-04-29-43
Report path: s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/reports


In [8]:
train_model_id, train_model_version, train_scope = "lightgbm-regression-model", "2.1.0", "training"
inference_instance_type = "ml.m5.large"
# Retrieve the inference docker container uri
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=train_model_id,
    model_version=train_model_version,
    instance_type=inference_instance_type,
)

In [9]:
model_name = f"smart-grid-LGBM-pred-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"

model_url = "s3://sagemaker-us-east-1-911199926915/Smart-Grid-prediction-lightGBM/output/LGBM-2024-02-04-19-49-05/SmartGrid-lightgbm-r-240204-1951-003-2a579e3d/output/model.tar.gz"

model = Model(image_uri=deploy_image_uri, model_data=model_url, role=role, sagemaker_session=session)

In [10]:
endpoint_name = f"smart-grid-LGBM-pred-model-quality-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("EndpointName =", endpoint_name)
JsonContentTypes = "application/json"
data_capture_config = DataCaptureConfig(
    enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_upload_path
)

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    entry_point="inference.py",
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)

EndpointName = smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429
------!

In [11]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer
predictor = Predictor(
    endpoint_name=endpoint_name, sagemaker_session=session, serializer=CSVSerializer()
)

In [12]:
validate_dataset = "validation_with_predictions.csv"

In [13]:
limit = 200  # Need at least 200 samples to compute standard deviations
i = 0
import json
with open(f"test_data/{validate_dataset}", "w") as baseline_file:
    baseline_file.write("prediction,label\n")  # our header
    with open("test_data/validation_data.csv", "r") as f:
        for row in f:
            (label, input_cols) = row.split(",", 1)
            prediction = predictor.predict(input_cols)
            data = json.loads(prediction)
            prediction_number = data['prediction'][0]
            baseline_file.write(f"{prediction_number},{label}\n")
            i += 1
            if i > limit:
                break
            print(".", end="", flush=True)
            sleep(0.5)
print()
print("Done!")

........................................................................................................................................................................................................
Done!


In [14]:
!head test_data/validation_with_predictions.csv

prediction,label
0.18991390837120498,0.259011
0.1619517934704773,0.22785021
0.14121530713917715,0.1908404
0.1296991125220355,0.1580797
0.1296991125220355,0.14016196
0.13292900111286904,0.1313667
0.1574929179869918,0.13451737
0.18926231079185496,0.14440958
0.22436757101117083,0.16721123


In [15]:
baseline_prefix = prefix + "/baselining"
baseline_data_prefix = baseline_prefix + "/data"
baseline_results_prefix = baseline_prefix + "/results"

baseline_data_uri = f"s3://{bucket}/{baseline_data_prefix}"
baseline_results_uri = f"s3://{bucket}/{baseline_results_prefix}"
print(f"Baseline data uri: {baseline_data_uri}")
print(f"Baseline results uri: {baseline_results_uri}")

Baseline data uri: s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/baselining/data
Baseline results uri: s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/baselining/results


In [16]:
baseline_dataset_uri = S3Uploader.upload(f"test_data/{validate_dataset}", baseline_data_uri)
baseline_dataset_uri

's3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/baselining/data/validation_with_predictions.csv'

In [17]:
from sagemaker.model_monitor import ModelQualityMonitor
from sagemaker.model_monitor import EndpointInput
from sagemaker.model_monitor.dataset_format import DatasetFormat

In [18]:
# Create the model quality monitoring object
lightGBM_model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=session,
)

In [19]:
# Name of the model quality baseline job
baseline_job_name = f"smart-grid-LGBM-pred-model-baseline-job-{datetime.utcnow():%Y-%m-%d-%H%M}"

In [20]:
# Execute the baseline suggestion job.
# You will specify problem type, in this case Binary Classification, and provide other required attributes.
job = lightGBM_model_quality_monitor.suggest_baseline(
    job_name=baseline_job_name,
    baseline_dataset=baseline_dataset_uri,
    dataset_format=DatasetFormat.csv(header=True),
    #dataset_format=DatasetFormat.json(), 
    output_s3_uri=baseline_results_uri,
    problem_type="Regression",
    inference_attribute="prediction",
    #probability_attribute="probability",
    ground_truth_attribute="label",
)
job.wait(logs=False)

INFO:sagemaker:Creating processing-job with name smart-grid-LGBM-pred-model-baseline-job-2024-02-26-0435


....................................................................!

In [21]:
baseline_job = lightGBM_model_quality_monitor.latest_baselining_job

In [22]:
# Access the baseline statistics and extract regression metrics
regression_metrics = baseline_job.baseline_statistics().body_dict["regression_metrics"]

# Normalize the regression metrics into a pandas DataFrame for easier viewing
pd.json_normalize(regression_metrics).T

Unnamed: 0,0
mae.value,0.032172
mae.standard_deviation,0.000495
mse.value,0.001669
mse.standard_deviation,3.3e-05
rmse.value,0.040855
rmse.standard_deviation,0.000398
r2.value,0.754814
r2.standard_deviation,0.010716


In [23]:
# Access the suggested constraints and extract regression constraints
regression_constraints = baseline_job.suggested_constraints().body_dict["regression_constraints"]

# Convert the regression constraints into a pandas DataFrame and transpose it
pd.DataFrame(regression_constraints).T

Unnamed: 0,threshold,comparison_operator
mae,0.032172,GreaterThanThreshold
mse,0.001669,GreaterThanThreshold
rmse,0.040855,GreaterThanThreshold
r2,0.754814,LessThanThreshold


In [24]:
def invoke_endpoint(ep_name, file_name):
    with open(file_name, "r") as f:
        i = 0
        for row in f:
            payload = row.rstrip("\n")
            response = session.sagemaker_runtime_client.invoke_endpoint(
                EndpointName=endpoint_name,
                ContentType="text/csv",
                Body=payload,
                InferenceId=str(i),  # unique ID per row
            )["Body"].read()
            #data = json.loads(response)
            #response = data['prediction'][0]
            i += 1
            sleep(1)


def invoke_endpoint_forever():
    while True:
        try:
            invoke_endpoint(endpoint_name, "test_data/batch_data.csv")
        except session.sagemaker_runtime_client.exceptions.ValidationError:
            pass


thread = Thread(target=invoke_endpoint_forever)
thread.start()

In [25]:
print("Waiting for captures to show up", end="")
for _ in range(120):
    capture_files = sorted(S3Downloader.list(f"{s3_capture_upload_path}/{endpoint_name}"))
    if capture_files:
        capture_file = S3Downloader.read_file(capture_files[-1]).split("\n")
        capture_record = json.loads(capture_file[0])
        if "inferenceId" in capture_record["eventMetadata"]:
            break
    print(".", end="", flush=True)
    sleep(1)
print()
print("Found Capture Files:")
print("\n ".join(capture_files[-3:]))

Waiting for captures to show up.....................................
Found Capture Files:
s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/datacapture/smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429/AllTraffic/2024/02/26/04/33-16-323-e225f885-de05-4a5c-9603-c0cf60899cca.jsonl
 s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/datacapture/smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429/AllTraffic/2024/02/26/04/34-16-804-b656aa13-2d24-4f6d-8a3f-bb958e81003d.jsonl
 s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/datacapture/smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429/AllTraffic/2024/02/26/04/40-50-070-4443421f-1d46-40af-84a1-01a3603ab4d6.jsonl


In [26]:
print("\n".join(capture_file[-3:-1]))

{"captureData":{"endpointInput":{"observedContentType":"text/csv","mode":"INPUT","data":"0,8.0,5.8,-1.0","encoding":"CSV"},"endpointOutput":{"observedContentType":"application/json","mode":"OUTPUT","data":"{\"prediction\": [0.23078267204663072]}","encoding":"JSON"}},"eventMetadata":{"eventId":"a1c514d3-7da1-41ab-9069-6a62d19b7e20","inferenceId":"56","inferenceTime":"2024-02-26T04:41:48Z"},"eventVersion":"0"}
{"captureData":{"endpointInput":{"observedContentType":"text/csv","mode":"INPUT","data":"0,9.0,6.41,-1.0","encoding":"CSV"},"endpointOutput":{"observedContentType":"application/json","mode":"OUTPUT","data":"{\"prediction\": [0.23593501650151272]}","encoding":"JSON"}},"eventMetadata":{"eventId":"ae4b166b-4078-45c4-a5c2-9b1ef7a6847c","inferenceId":"57","inferenceTime":"2024-02-26T04:41:49Z"},"eventVersion":"0"}


In [27]:
print(json.dumps(capture_record, indent=2))

{
  "captureData": {
    "endpointInput": {
      "observedContentType": "text/csv",
      "mode": "INPUT",
      "data": "5,0.0,9.24,-1.0",
      "encoding": "CSV"
    },
    "endpointOutput": {
      "observedContentType": "application/json",
      "mode": "OUTPUT",
      "data": "{\"prediction\": [0.1829749492362901]}",
      "encoding": "JSON"
    }
  },
  "eventMetadata": {
    "eventId": "360af48c-b08b-434b-8d3d-06ded5d59a6e",
    "inferenceId": "0",
    "inferenceTime": "2024-02-26T04:40:50Z"
  },
  "eventVersion": "0"
}


In [28]:
test_data = pd.read_csv("test_data/test_data.csv", header=None)
test_data

Unnamed: 0,0,1,2,3,4
0,0.241110,5,0.0,9.24,-1.0
1,0.191735,5,1.0,9.31,-1.0
2,0.155147,5,2.0,9.72,-1.0
3,0.135726,5,3.0,9.18,-1.0
4,0.126615,5,4.0,6.73,-1.0
...,...,...,...,...,...
644,0.347004,3,20.0,5.94,-1.0
645,0.334706,3,21.0,5.03,-1.0
646,0.299760,3,22.0,4.10,-1.0
647,0.252274,3,23.0,3.93,-1.0


In [29]:
def ground_truth_with_id(inference_id):
    return {
        "groundTruthData": {
            "data": f"[{(test_data[0][inference_id]-0.04)}]",  # for testing set as constance 
            "encoding": "CSV",
        },
        "eventMetadata": {
            "eventId": str(inference_id),
        },
        "eventVersion": "0",
    }


def upload_ground_truth(records, upload_time):
    fake_records = [json.dumps(r) for r in records]
    data_to_upload = "\n".join(fake_records)
    target_s3_uri = f"{ground_truth_upload_path}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
    print(f"Uploading {len(fake_records)} records to", target_s3_uri)
    S3Uploader.upload_string_as_file_body(data_to_upload, target_s3_uri)

In [30]:
NUM_GROUND_TRUTH_RECORDS = 649  # 649 are the number of rows in data we're sending for inference


def generate_fake_ground_truth_forever():
    j = 0
    while True:
        fake_records = [ground_truth_with_id(i) for i in range(NUM_GROUND_TRUTH_RECORDS)]
        upload_ground_truth(fake_records, datetime.utcnow())
        j = (j + 1) % 5
        sleep(60 * 60)  # do this once an hour


gt_thread = Thread(target=generate_fake_ground_truth_forever)
gt_thread.start()

Uploading 649 records to s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/ground_truth_data/2024-02-26-04-29-43/2024/02/26/04/4155.jsonl


In [31]:
##Monitoring schedule name
smartgrid_monitor_schedule_name = (
    f"smart-grid-LGBM-pred-model-monitoring-schedule-{datetime.utcnow():%Y-%m-%d-%H%M}"
)

In [32]:
# Create an enpointInput
endpointInput = EndpointInput(
    endpoint_name=predictor.endpoint_name,
    #probability_attribute="0",
    #probability_threshold_attribute=0.5,
    destination="/opt/ml/processing/input_data",
    inference_attribute='prediction0'
)

In [33]:
s3_key = f"s3://{bucket}/{prefix}"
pre_processor_script = S3Uploader.upload("code/preprocessor.py", s3_key)
pre_processor_script

's3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/preprocessor.py'

In [34]:
# Create the monitoring schedule to execute every hour.
from sagemaker.model_monitor import CronExpressionGenerator
#bucket1 = boto3.Session().resource("s3").Bucket(session.default_bucket())
#pre_processor_script = bucket1.Object(os.path.join(prefix, "preprocessor.py")).upload_file("code/preprocessor.py")

response = lightGBM_model_quality_monitor.create_monitoring_schedule(
    record_preprocessor_script=pre_processor_script,
    monitor_schedule_name=smartgrid_monitor_schedule_name,
    endpoint_input=endpointInput,
    output_s3_uri=baseline_results_uri,
    problem_type="Regression",
    ground_truth_input=ground_truth_upload_path,
    constraints=baseline_job.suggested_constraints(),
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True,
)

INFO:sagemaker.model_monitor.model_monitoring:Creating Monitoring Schedule with name: smart-grid-LGBM-pred-model-monitoring-schedule-2024-02-26-0441


In [35]:
# Create the monitoring schedule
# You will see the monitoring schedule in the 'Scheduled' status
lightGBM_model_quality_monitor.describe_schedule()

{'MonitoringScheduleArn': 'arn:aws:sagemaker:us-east-1:911199926915:monitoring-schedule/smart-grid-LGBM-pred-model-monitoring-schedule-2024-02-26-0441',
 'MonitoringScheduleName': 'smart-grid-LGBM-pred-model-monitoring-schedule-2024-02-26-0441',
 'MonitoringScheduleStatus': 'Pending',
 'MonitoringType': 'ModelQuality',
 'CreationTime': datetime.datetime(2024, 2, 26, 4, 41, 57, 171000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 2, 26, 4, 41, 57, 246000, tzinfo=tzlocal()),
 'MonitoringScheduleConfig': {'ScheduleConfig': {'ScheduleExpression': 'cron(0 * ? * * *)'},
  'MonitoringJobDefinitionName': 'model-quality-job-definition-2024-02-26-04-41-56-700',
  'MonitoringType': 'ModelQuality'},
 'EndpointName': 'smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429',
 'ResponseMetadata': {'RequestId': '76d1930f-15e1-49b5-9218-0e0539b05f35',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '76d1930f-15e1-49b5-9218-0e0539b05f35',
   'content-type': 'applica

In [36]:
# Initially there will be no executions since the first execution happens at the top of the hour
# Note that it is common for the execution to luanch upto 20 min after the hour.
executions = lightGBM_model_quality_monitor.list_executions()
executions



[]

In [37]:
# Wait for the first execution of the monitoring_schedule
print("Waiting for first execution ", end="")
while True:
    execution = lightGBM_model_quality_monitor.describe_schedule().get(
        "LastMonitoringExecutionSummary"
    )
    if execution:
        break
    print(".", end="", flush=True)
    sleep(10)
print()
print("Execution found!")

Waiting for first execution .................................................................................................................................
Execution found!


In [38]:
while not executions:
    executions = lightGBM_model_quality_monitor.list_executions()
    print(".", end="", flush=True)
    sleep(10)
latest_execution = executions[-1]
latest_execution.describe()

........................

{'ProcessingInputs': [{'InputName': 'groundtruth_input_1',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/ground_truth_data/2024-02-26-04-29-43/2024/02/26/04',
    'LocalPath': '/opt/ml/processing/groundtruth/2024/02/26/04',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated',
    'S3CompressionType': 'None'}},
  {'InputName': 'endpoint_input_1',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/datacapture/smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429/AllTraffic/2024/02/26/04',
    'LocalPath': '/opt/ml/processing/input_data/smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429/AllTraffic/2024/02/26/04',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated',
    'S3CompressionType': 'None'}}],
 

In [39]:
try:
    status = execution["MonitoringExecutionStatus"]

    while status in ["Pending", "InProgress"]:
        print("Waiting for execution to finish", end="")
        latest_execution.wait(logs=False)
        latest_job = latest_execution.describe()
        print()
        print(f"{latest_job['ProcessingJobName']} job status:", latest_job["ProcessingJobStatus"])
        print(
            f"{latest_job['ProcessingJobName']} job exit message, if any:",
            latest_job.get("ExitMessage"),
        )
        print(
            f"{latest_job['ProcessingJobName']} job failure reason, if any:",
            latest_job.get("FailureReason"),
        )
        sleep(
            30
        )  # model quality executions consist of two Processing jobs, wait for second job to start
        latest_execution = lightGBM_model_quality_monitor.list_executions()[-1]
        execution = lightGBM_model_quality_monitor.describe_schedule()["LastMonitoringExecutionSummary"]
        status = execution["MonitoringExecutionStatus"]

    print("Execution status is:", status)

    if status != "Completed":
        print(execution)
        print(
            "====STOP==== \n No completed executions to inspect further. Please wait till an execution completes or investigate previously reported failures."
        )
except Exception as e:
    print(f"An error occurred: {e}")

Waiting for execution to finish...................................................................!
groundtruth-merge-202402260500-a69f684bc805a077eb99a69a job status: Completed
groundtruth-merge-202402260500-a69f684bc805a077eb99a69a job exit message, if any: None
groundtruth-merge-202402260500-a69f684bc805a077eb99a69a job failure reason, if any: None
Waiting for execution to finish..................................................................!
model-quality-monitoring-202402260500-a69f684bc805a077eb99a69a job status: Completed
model-quality-monitoring-202402260500-a69f684bc805a077eb99a69a job exit message, if any: CompletedWithViolations: Job completed successfully with 2 violations.
model-quality-monitoring-202402260500-a69f684bc805a077eb99a69a job failure reason, if any: None
Execution status is: CompletedWithViolations
{'MonitoringScheduleName': 'smart-grid-LGBM-pred-model-monitoring-schedule-2024-02-26-0441', 'ScheduledTime': datetime.datetime(2024, 2, 26, 5, 0, tzinfo=tzlocal

In [40]:
latest_execution = lightGBM_model_quality_monitor.list_executions()[-1]
report_uri = latest_execution.describe()["ProcessingOutputConfig"]["Outputs"][0]["S3Output"][
    "S3Uri"
]
print("Report Uri:", report_uri)

Report Uri: s3://sagemaker-us-east-1-911199926915/sagemaker/LGBM-ModelQualityMonitor-20240225test/baselining/results/smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429/smart-grid-LGBM-pred-model-monitoring-schedule-2024-02-26-0441/2024/02/26/05


In [41]:
pd.options.display.max_colwidth = None
violations = latest_execution.constraint_violations().body_dict["violations"]
violations_df = pd.json_normalize(violations)
violations_df.head(10)

Unnamed: 0,constraint_check_type,description,metric_name
0,GreaterThanThreshold,Metric mae with 0.03563435589812909 +/- 3.3556802666891557E-4 was GreaterThanThreshold '0.03217169328073971',mae
1,LessThanThreshold,Metric r2 with 0.7148569096176027 +/- 0.0048325756134775565 was LessThanThreshold '0.7548139611929279',r2


In [42]:
# Create CloudWatch client
cw_client = boto3.Session().client("cloudwatch")

namespace = "aws/sagemaker/Endpoints/model-metrics"

cw_dimensions = [
    {"Name": "Endpoint", "Value": endpoint_name},
    {"Name": "MonitoringSchedule", "Value": smartgrid_monitor_schedule_name},
]

In [43]:
# List metrics through the pagination interface
paginator = cw_client.get_paginator("list_metrics")

for response in paginator.paginate(Dimensions=cw_dimensions, Namespace=namespace):
    model_quality_metrics = response["Metrics"]
    for metric in model_quality_metrics:
        print(metric["MetricName"])

rmse
r2
total_number_of_violations
mae
mse


In [45]:
alarm_name = "MODEL_QUALITY_mae"
alarm_desc = (
    "Trigger an CloudWatch alarm when the mae drifts away from the baseline constraints"
)
mdoel_quality_mae_drift_threshold = (
    0.0321  ##Setting this threshold purposefully low to see the alarm quickly.
)
metric_name = "mae"
namespace = "aws/sagemaker/Endpoints/model-metrics"

cw_client.put_metric_alarm(
    AlarmName=alarm_name,
    AlarmDescription=alarm_desc,
    ActionsEnabled=True,
    MetricName=metric_name,
    Namespace=namespace,
    Statistic="Average",
    Dimensions=[
        {"Name": "Endpoint", "Value": endpoint_name},
        {"Name": "MonitoringSchedule", "Value": smartgrid_monitor_schedule_name},
    ],
    Period=600,
    EvaluationPeriods=1,
    DatapointsToAlarm=1,
    Threshold=mdoel_quality_mae_drift_threshold ,
    ComparisonOperator="GreaterThanThreshold",
    TreatMissingData="breaching",
)

{'ResponseMetadata': {'RequestId': '50923532-ae2e-4fbb-94e8-3199f1901ead',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '50923532-ae2e-4fbb-94e8-3199f1901ead',
   'content-type': 'text/xml',
   'content-length': '214',
   'date': 'Mon, 26 Feb 2024 05:23:49 GMT'},
  'RetryAttempts': 0}}

In [46]:
lightGBM_model_quality_monitor.delete_monitoring_schedule()
sleep(60)  # actually wait for the deletion

INFO:sagemaker:Deleting Monitoring Schedule with name: smart-grid-LGBM-pred-model-monitoring-schedule-2024-02-26-0441
INFO:sagemaker.model_monitor.model_monitoring:Deleting Model Quality Job Definition with name: model-quality-job-definition-2024-02-26-04-41-56-700


In [47]:
predictor.delete_model()
predictor.delete_endpoint()

INFO:sagemaker:Deleting model with name: pytorch-inference-2024-02-26-04-29-43-714
INFO:sagemaker:Deleting endpoint configuration with name: smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429
INFO:sagemaker:Deleting endpoint with name: smart-grid-LGBM-pred-model-quality-monitor-2024-02-26-0429


In [48]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>