# Baselining and scheduled monitoring with SageMaker Model Monitor

<img align="left" width="130" src="https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Extra/cover-small-padded.png"/>

This notebook contains the code to help readers work through one of the recipes of the book [Machine Learning with Amazon SageMaker Cookbook: 80 proven recipes for data scientists and developers to perform ML experiments and deployments](https://www.amazon.com/Machine-Learning-Amazon-SageMaker-Cookbook/dp/1800567030)

### How to do it...

In [None]:
%store -r s3_bucket_name
%store -r prefix

In [None]:
base = f's3://{s3_bucket_name}/{prefix}'
baseline_data_uri = f'{base}/input/training_data.csv'
baseline_results_uri = f"{base}/model-monitor/baseline-results"

In [None]:
local_file = "tmp/baseline.csv"
!aws s3 cp {baseline_data_uri} {local_file}

import pandas as pd
baseline_df = pd.read_csv(local_file)
baseline_df

In [None]:
import sagemaker
role = sagemaker.get_execution_role()

In [None]:
from sagemaker.model_monitor import DefaultModelMonitor

default_monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    volume_size_in_gb=20,
    max_runtime_in_seconds=3600,
)

In [None]:
%%time

from sagemaker.model_monitor import dataset_format

dsf = dataset_format.DatasetFormat.csv(header=True)

default_monitor.suggest_baseline(
    baseline_dataset=baseline_data_uri,
    dataset_format=dsf,
    output_s3_uri=baseline_results_uri,
    wait=True
)

In [None]:
baseline_job = default_monitor.latest_baselining_job
baseline_job.__dict__

In [None]:
stats = baseline_job.baseline_statistics()
schema_dict = stats.body_dict["features"]

In [None]:
import pandas as pd
schema_df = pd.json_normalize(schema_dict)
schema_df.head(5)

In [None]:
constraints = baseline_job.suggested_constraints()
constraints_dict = constraints.body_dict["features"]

In [None]:
constraints_df = pd.json_normalize(constraints_dict)
constraints_df.head(7)

In [None]:
!aws s3 cp {baseline_results_uri}/ tmp/ --recursive

In [None]:
!cat tmp/constraints.json

In [None]:
!cat tmp/statistics.json

In [None]:
from sagemaker.model_monitor import CronExpressionGenerator
from time import gmtime, strftime

In [None]:
import random
from string import ascii_uppercase

def generate_schedule_name():
    chars = random.choices(ascii_uppercase, k=5)
    output = 'schedule-' + ''.join(chars)
    return output

In [None]:
schedule_name = generate_schedule_name()
schedule_name

In [None]:
s3_report_path = f'{base}/report-path'

In [None]:
baseline_statistics = default_monitor.baseline_statistics()

In [None]:
constraints = default_monitor.suggested_constraints()

In [None]:
cron_expression = CronExpressionGenerator.hourly()
cron_expression

In [None]:
%store -r endpoint_name

from sagemaker import Predictor
predictor = Predictor(endpoint_name=endpoint_name)

In [None]:
try:
    default_monitor.delete_monitoring_schedule()
except:
    pass

In [None]:
%store -r csv_input
csv_input

In [None]:
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import CSVSerializer

predictor.serializer = CSVSerializer()
predictor.deserializer = JSONDeserializer()

In [None]:
predictor.predict(csv_input)

In [None]:
constraints.__dict__

In [None]:
constraints.body_dict['features'][0]['inferred_type'] = 'Fractional'
constraints.body_dict

In [None]:
constraints.save()

In [None]:
default_monitor.create_monitoring_schedule(
    monitor_schedule_name=schedule_name,
    endpoint_input=predictor.endpoint,
    output_s3_uri=s3_report_path,
    statistics=baseline_statistics,
    constraints=constraints,
    schedule_cron_expression=cron_expression,
    enable_cloudwatch_metrics=True,
)

In [None]:
default_monitor.describe_schedule()

In [None]:
from time import sleep
sleep(300)

In [None]:
def perform_good_input():
    predictor.predict(csv_input)
    print("good input")

In [None]:
def perform_bad_input():
    csv_bad_input = '1,92,-83.3,86,-96,67'
    predictor.predict(csv_bad_input)
    print("bad input")

In [None]:
perform_good_input()
perform_bad_input()

In [None]:
dm = default_monitor
monitoring_violations = dm.latest_monitoring_constraint_violations()
monitoring_statistics = dm.latest_monitoring_statistics()

In [None]:
%%time

from time import sleep

violations = monitoring_violations

while not violations:
    print("No executions yet. Sleeping for 5 minutes...")
    sleep(300)
    
    perform_good_input()
    perform_bad_input()
    
    try:
        v = dm.latest_monitoring_constraint_violations()
        violations = v
    except:
        pass
    
print("Executions found!")

In [None]:
violations = dm.latest_monitoring_constraint_violations()
violations.__dict__

In [None]:
!aws s3 cp {violations.file_s3_uri} tmp/violations.json

In [None]:
!cat tmp/violations.json

In [None]:
monitoring_statistics = dm.latest_monitoring_statistics()
monitoring_statistics.__dict__

In [None]:
default_monitor.delete_monitoring_schedule()

In [None]:
predictor.delete_endpoint()