In [2]:
%%capture

!pip install tensorflow==2.3.0
!pip install sagemaker-experiments

#### Imports 

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sagemaker.tensorflow.serving import TensorFlowModel
from sagemaker.multidatamodel import MultiDataModel
from tensorflow.keras.datasets import cifar10
from sagemaker.tensorflow import TensorFlow
from sagemaker.inputs import TrainingInput
from sagemaker import get_execution_role
from tensorflow.keras import utils
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from datetime import datetime
import tensorflow as tf
import numpy as np
import sagemaker
import logging
import boto3
import time
import os

#### Setup Logger

In [4]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())
!python --version

Python 3.7.10


In [5]:
logger.info(f'[Using TensorFlow version: {tf.__version__}]')
logger.info(f'[Using SageMaker version: {sagemaker.__version__}]')

[Using TensorFlow version: 2.3.0]
[Using SageMaker version: 2.94.0]


#### Seed for Reproducability

In [6]:
SEED = 123
np.random.seed(SEED)
tf.random.set_seed(SEED)

#### Create Roles, Sessions and Data Locations

In [7]:
role = get_execution_role()
session = boto3.Session()
sagemaker_session = sagemaker.Session()

s3 = session.resource('s3')
TF_FRAMEWORK_VERSION = '2.3.0'
BUCKET = sagemaker.Session().default_bucket()
PREFIX = 'cv-models'
MONITORING_FOLDER = 'DEMO-tf2-ModelMonitor'

### Train - CIFAR-10 Image Classification

<p align="justify">First, we will train a Convolutional Neural Network (CNN) model to classify images from the CIFAR-10 dataset. Image classification is the task of assigning a label to an image, from a predefined set of categories. CIFAR-10 is an established CV dataset used for object recognition. It is a subset of the 80 Million Tiny Images dataset and consists of 60,000 (32x32) color images containing 1 of 10 object classes, with 6,000 images per class.</p>

#### a) Load Data

The first step is to load the pre-shuffled CIFAR-10 dataset into our train and test objects. Luckily, Keras provides the CIFAR dataset for us to load using the `load_data()` method. All we have to do is import keras.datasets and then load the data.

In [8]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [9]:
logger.info(f'X_train Shape: {X_train.shape}')
logger.info(f'y_train Shape: {y_train.shape}')
logger.info(f'X_test Shape : {X_test.shape}')
logger.info(f'y_test Shape : {y_test.shape}')

X_train Shape: (50000, 32, 32, 3)
y_train Shape: (50000, 1)
X_test Shape : (10000, 32, 32, 3)
y_test Shape : (10000, 1)


#### c) Data Preparation

##### Rescale 
Rescales the images by dividing the pixel values by 255: [0,255] ⇒ [0,1]

In [10]:
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255

##### One Hot Encode Target Labels
One-hot encoding is a process by which categorical variables are converted into a numeric form. One-hot encoding converts the (1 × n) label vector to a label matrix of dimensions (10 × n), where n is the number of sample images. So, if we have 1,000 images in our dataset, the label vector will have the dimensions (1 × 1000). After one-hot encoding, the label matrix dimensions will be (1000 × 10). That’s why, when we define our network architecture in the next step, we will make the output softmax layer contain 10 nodes, where each node represents the probability of each class we have.

In [11]:
num_classes = len(np.unique(y_train))
y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test, num_classes)

##### Split Data
Break original train set further into train and validation sets.

In [12]:
X_train, X_validation = X_train[500:], X_train[:500]
y_train, y_validation = y_train[500:], y_train[:500]

##### Save to Local

Create a local `data/cifar_10` directory to save the datasets.

In [190]:
DATASET_PATH = './data/cifar_10'

In [191]:
os.makedirs(DATASET_PATH, exist_ok=True)

Save train, validation and test sets to local `data` directory

In [192]:
np.save(f'{DATASET_PATH}/X_train.npy', X_train)
np.save(f'{DATASET_PATH}/y_train.npy', y_train)
np.save(f'{DATASET_PATH}/X_validation.npy', X_validation)
np.save(f'{DATASET_PATH}/y_validation.npy', y_validation)
np.save(f'{DATASET_PATH}/X_test.npy', X_test)
np.save(f'{DATASET_PATH}/y_test.npy', y_test)

##### Copy Datasets to S3
Copy train, validation and test sets from the local dir to S3, since SageMaker expects datasets to be in S3 for training.

In [15]:
!aws s3 cp ./{DATASET_PATH}/X_train.npy s3://{BUCKET}/{PREFIX}/cifar_10/train/
!aws s3 cp ./{DATASET_PATH}/y_train.npy s3://{BUCKET}/{PREFIX}/cifar_10/train/
!aws s3 cp ./{DATASET_PATH}/X_validation.npy s3://{BUCKET}/{PREFIX}/cifar_10/validation/
!aws s3 cp ./{DATASET_PATH}/y_validation.npy s3://{BUCKET}/{PREFIX}/cifar_10/validation/
!aws s3 cp ./{DATASET_PATH}/X_test.npy s3://{BUCKET}/{PREFIX}/cifar_10/test/
!aws s3 cp ./{DATASET_PATH}/y_test.npy s3://{BUCKET}/{PREFIX}/cifar_10/test/

upload: data/cifar_10/X_train.npy to s3://sagemaker-us-east-1-949263681218/cv-models/cifar_10/train/X_train.npy
upload: data/cifar_10/y_train.npy to s3://sagemaker-us-east-1-949263681218/cv-models/cifar_10/train/y_train.npy
upload: data/cifar_10/X_validation.npy to s3://sagemaker-us-east-1-949263681218/cv-models/cifar_10/validation/X_validation.npy
upload: data/cifar_10/y_validation.npy to s3://sagemaker-us-east-1-949263681218/cv-models/cifar_10/validation/y_validation.npy
upload: data/cifar_10/X_test.npy to s3://sagemaker-us-east-1-949263681218/cv-models/cifar_10/test/X_test.npy
upload: data/cifar_10/y_test.npy to s3://sagemaker-us-east-1-949263681218/cv-models/cifar_10/test/y_test.npy


# Create Training Inputs

In [13]:
train_input = TrainingInput(s3_data=f's3://{BUCKET}/{PREFIX}/cifar_10/train', 
                            distribution='FullyReplicated', 
                            content_type='npy')
validation_input = TrainingInput(s3_data=f's3://{BUCKET}/{PREFIX}/cifar_10/validation', 
                                 distribution='FullyReplicated', 
                                 content_type='npy')
test_input = TrainingInput(s3_data=f's3://{BUCKET}/{PREFIX}/cifar_10/test', 
                           distribution='FullyReplicated', 
                           content_type='npy')

In [14]:
inputs = {'train': train_input, 'val': validation_input, 'test': test_input}

#### e) Define Model Architecture & create Training Script

We will build a small CNN consisting of three convolutional layers and two dense layers.<br>
<b>Note:</b> We will use the ReLU activation function for all the hidden layers. In the last dense layer, we will use a softmax activation function with 10 nodes to return an array of 10 probability scores (summing to 1). Each score will be the probability that the current image belongs to our 10 image classes.

# Prepare a Experiment Tracker

In [15]:
sm = boto3.client('sagemaker')

In [None]:
from smexperiments.experiment import Experiment
cifar_experiment = Experiment.create(
    experiment_name="cifar-10-dataset-experiment", 
    description="objects", 
    sagemaker_boto_client=sm)

In [None]:
from smexperiments.trial import Trial
for num_hidden_channel in [32]:
    trial_name = f"cnn-training-job-{num_hidden_channel}-hidden-channels-{int(time.time())}"
    cnn_trial = Trial.create(
        trial_name=trial_name, 
        experiment_name=cifar_experiment.experiment_name,
        sagemaker_boto_client=sm,
    )
    cnn_trial.add_trial_component(tracker.trial_component)

#### f) Create a TensorFlow Estimator & fit the Model

In [13]:
model_name = 'cifar-10'
hyperparameters = {'epochs': 1}
estimator_parameters = {'entry_point':'cifar_train.py',
                        'instance_type': 'ml.m5.2xlarge',
                        'instance_count': 1,
                        'model_dir': '/opt/ml/model',
                        'role': role,
                        'hyperparameters': hyperparameters,
                        'output_path': f's3://{BUCKET}/{PREFIX}/cifar_10/out',
                        'base_job_name': f'mme-cv-{model_name}',
                        'framework_version': TF_FRAMEWORK_VERSION,
                        'py_version': 'py37',
                        'script_mode': True}
model = TensorFlow(**estimator_parameters)

In [14]:
cnn_training_job_name = "cnn-training-job-{}".format(int(time.time()))
print(cnn_training_job_name)
model.fit(inputs, job_name=cnn_training_job_name,
        experiment_config={
            "ExperimentName": "cifar-10-dataset-experiment", 
            "TrialName": 'cnn-training-job-32-hidden-channels-1655871250',
            "TrialComponentDisplayName": "Training",
        })

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


cnn-training-job-1655978931


INFO:sagemaker:Creating training-job with name: cnn-training-job-1655978931


2022-06-23 10:08:52 Starting - Starting the training job...
2022-06-23 10:09:15 Starting - Preparing the instances for trainingProfilerReport-1655978931: InProgress
......
2022-06-23 10:10:16 Downloading - Downloading input data...
2022-06-23 10:10:49 Training - Downloading the training image...
2022-06-23 10:11:16 Training - Training image download completed. Training in progress.[34m2022-06-23 10:11:10,857 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-06-23 10:11:10,865 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-23 10:11:11,280 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-23 10:11:11,395 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-23 10:11:11,413 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-23 10:11:11

# Contents of the recorded experiment

In [6]:
trial_component_analytics = ExperimentAnalytics(
    experiment_name='cifar-10-dataset-experiment',
    sort_by="metrics.test:accuracy.max",
    sort_order="Descending",
    metric_names=['test:accuracy'],
   # parameter_names=['hidden_channels', 'epochs', 'dropout', 'optimizer']
)
analytic_table = trial_component_analytics.dataframe()
analytic_table.head()

Unnamed: 0,TrialComponentName,DisplayName,SourceArn,SageMaker.ImageUri,SageMaker.InstanceCount,SageMaker.InstanceType,SageMaker.VolumeSizeInGB,epochs,model_dir,sagemaker_container_log_level,...,SageMaker.DebugHookOutput - MediaType,SageMaker.DebugHookOutput - Value,SageMaker.ModelArtifact - MediaType,SageMaker.ModelArtifact - Value,Trials,Experiments,normalization_mean,normalization_std,cifar-10-dataset-log - MediaType,cifar-10-dataset-log - Value
0,cnn-training-job-1655879605-aws-training-job,Training,arn:aws:sagemaker:us-east-1:949263681218:train...,763104351884.dkr.ecr.us-east-1.amazonaws.com/t...,1.0,ml.m5.4xlarge,30.0,3.0,"""/opt/ml/model""",20.0,...,,s3://sagemaker-us-east-1-949263681218/cv-model...,,s3://sagemaker-us-east-1-949263681218/cv-model...,[cnn-training-job-32-hidden-channels-1655871250],[cifar-10-dataset-experiment],,,,
1,TrialComponent-2022-06-22-041210-guap,Preprocessing,,,,,,,,,...,,,,,[cnn-training-job-10-hidden-channels-165587124...,"[cifar-10-dataset-experiment, cifar-10-dataset...",0.1307,0.3081,s3/uri,random
2,cnn-training-job-1655871262-aws-training-job,Training,arn:aws:sagemaker:us-east-1:949263681218:train...,763104351884.dkr.ecr.us-east-1.amazonaws.com/t...,1.0,ml.m5.2xlarge,30.0,1.0,"""/opt/ml/model""",20.0,...,,s3://sagemaker-us-east-1-949263681218/cv-model...,,s3://sagemaker-us-east-1-949263681218/cv-model...,[cnn-training-job-32-hidden-channels-1655871250],[cifar-10-dataset-experiment],,,,


In [4]:
from sagemaker.analytics import TrainingJobAnalytics
from sagemaker.analytics import ExperimentAnalytics

In [60]:
analytics = TrainingJobAnalytics(training_job_name = 'cnn-training-job-1655871262', metric_names=['test:accuracy'])

In [75]:
analytics.__dict__ #['_cloudwatch'].list_metrics()

{'_sage_client': <botocore.client.SageMaker at 0x7f6cb0c3f8d0>,
 '_cloudwatch': <botocore.client.CloudWatch at 0x7f6cb13a02e8>,
 '_training_job_name': 'cnn-training-job-1655871262',
 '_start_time': None,
 '_end_time': None,
 '_period': 60,
 '_metric_names': ['test:accuracy'],
 '_dataframe': None,
 '_data': defaultdict(list, {}),
 '_time_interval': {'start_time': datetime.datetime(2022, 6, 22, 4, 15, 56, 247000, tzinfo=tzlocal()),
  'end_time': datetime.datetime(2022, 6, 22, 4, 20, 6, 584000, tzinfo=tzlocal())}}

# Experiment Cleanup

In [94]:
# def cleanup_sme_sdk(experiment):
#     for trial_summary in experiment.list_trials():
#         trial = Trial.load(trial_name=trial_summary.trial_name)
#         for trial_component_summary in trial.list_trial_components():
#             tc = TrialComponent.load(
#                 trial_component_name=trial_component_summary.trial_component_name)
#             trial.remove_trial_component(tc)
#             try:
#                 # comment out to keep trial components
#                 tc.delete()
#             except:
#                 # tc is associated with another trial
#                 continue
#             # to prevent throttling
#             time.sleep(.5)
#         trial.delete()
#         experiment_name = experiment.experiment_name
#     experiment.delete()
#     print(f"\nExperiment {experiment_name} deleted")

In [None]:
# cleanup_sme_sdk('cifar-10-dataset-experiment')

# LOAD TRAINED MODEL

In [16]:
model_path = f's3://{BUCKET}/cv-models/cifar_10/out/cnn-training-job-1655978931/output/model.tar.gz'
from sagemaker.tensorflow.model import TensorFlowModel

model = TensorFlowModel(model_data=model_path, role=role, framework_version="2.3.0")

# Deploy

In [17]:
from sagemaker.model_monitor import DataCaptureConfig

data_capture_prefix = "{}/monitoring/datacapture/".format(MONITORING_FOLDER)
s3_capture_upload_path = "s3://{}/{}".format(BUCKET, data_capture_prefix)

data_capture_configuration=DataCaptureConfig(
        enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_upload_path
    )

In [18]:
endpoint_name=f'tensorflow-cv-{int(time.time())}'
predictor = model.deploy(initial_instance_count=1,
                       instance_type='ml.m5.xlarge',
                       endpoint_name=endpoint_name,
 data_capture_config = data_capture_configuration)
print(f"\nSuccessfully deployed at {endpoint_name}...")

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


-------------!
Successfully deployed at tensorflow-cv-1656064111...


In [19]:
import boto3
sm.describe_endpoint(EndpointName = endpoint_name)

{'EndpointName': 'tensorflow-cv-1656064111',
 'EndpointArn': 'arn:aws:sagemaker:us-east-1:949263681218:endpoint/tensorflow-cv-1656064111',
 'EndpointConfigName': 'tensorflow-cv-1656064111',
 'ProductionVariants': [{'VariantName': 'AllTraffic',
   'DeployedImages': [{'SpecifiedImage': '763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:2.3.0-cpu',
     'ResolvedImage': '763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference@sha256:91ebb7428846c5f7b515d5d9b8389a14c73d0c5d02657f4a6413592124333278',
     'ResolutionTime': datetime.datetime(2022, 6, 24, 9, 48, 35, 301000, tzinfo=tzlocal())}],
   'CurrentWeight': 1.0,
   'DesiredWeight': 1.0,
   'CurrentInstanceCount': 1,
   'DesiredInstanceCount': 1}],
 'DataCaptureConfig': {'EnableCapture': True,
  'CaptureStatus': 'Started',
  'CurrentSamplingPercentage': 100,
  'DestinationS3Uri': 's3://sagemaker-us-east-1-949263681218/DEMO-tf2-ModelMonitor/monitoring/datacapture/'},
 'EndpointStatus': 'InService',
 'CreationTim

# Invoking the Deployed Model

In [20]:
%matplotlib inline
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing import image
from IPython.display import Image
import matplotlib.image as mpimg 
import matplotlib.pyplot as plt
import numpy as np
CIFAR10_LABELS = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [21]:
from sagemaker.tensorflow.model import TensorFlowPredictor
predictor = TensorFlowPredictor(endpoint_name = endpoint_name)

In [22]:
img = load_img('./data/cifar_10/raw_images/jeep.png', target_size=(32, 32))
data = img_to_array(img)
data = data.astype('float32')
data = data / 255.0
data = data.reshape(1, 32, 32, 3)


In [23]:
print(data.shape)

(1, 32, 32, 3)


In [24]:
payload = {'instances': data}

In [25]:
resp = predictor.predict(payload)
predicted_label = CIFAR10_LABELS[np.argmax(resp['predictions'])]
print(f'Predicted Label: [{predicted_label}]')

Predicted Label: [truck]


In [27]:
!ls /opt/ml

metadata


# Sending Test Traffic

In [28]:
%%time

import time
print("Sending test traffic to the endpoint {}. \nPlease wait...".format(endpoint_name))

flat_list = []
for i in range(200):
    data = np.array([X_test[i]])
    payload = {'instances': data}
    resp = predictor.predict(payload)
    predicted_label = CIFAR10_LABELS[np.argmax(resp['predictions'])]
    flat_list.append(predicted_label)
    time.sleep(0.5)

print("Done!")
print("predictions: \t{}".format(np.array(flat_list)))

Sending test traffic to the endpoint tensorflow-cv-1656064111. 
Please wait...
Done!
predictions: 	['dog' 'ship' 'ship' 'ship' 'frog' 'frog' 'truck' 'frog' 'dog'
 'automobile' 'dog' 'truck' 'dog' 'dog' 'truck' 'ship' 'dog' 'horse'
 'ship' 'frog' 'horse' 'airplane' 'bird' 'automobile' 'horse' 'horse'
 'dog' 'horse' 'truck' 'frog' 'cat' 'horse' 'deer' 'dog' 'truck' 'dog'
 'horse' 'automobile' 'automobile' 'dog' 'truck' 'frog' 'cat' 'dog'
 'truck' 'truck' 'cat' 'truck' 'deer' 'bird' 'truck' 'ship' 'dog' 'cat'
 'ship' 'ship' 'dog' 'cat' 'bird' 'dog' 'horse' 'dog' 'automobile' 'truck'
 'frog' 'frog' 'automobile' 'airplane' 'cat' 'horse' 'bird' 'horse' 'ship'
 'ship' 'truck' 'bird' 'truck' 'dog' 'dog' 'truck' 'ship' 'automobile'
 'automobile' 'horse' 'bird' 'bird' 'bird' 'ship' 'truck' 'automobile'
 'ship' 'dog' 'ship' 'frog' 'deer' 'horse' 'frog' 'bird' 'horse' 'horse'
 'horse' 'horse' 'dog' 'cat' 'automobile' 'automobile' 'dog' 'frog' 'dog'
 'dog' 'horse' 'airplane' 'bird' 'bird' 'automobi

In [38]:
s3_client = boto3.Session().client("s3")
result = s3_client.list_objects(Bucket=BUCKET, Prefix=data_capture_prefix)
capture_files = [capture_file.get("Key") for capture_file in result.get("Contents")]
print("Found Capture Files:")
print("\n ".join(capture_files))
print(BUCKET)

Found Capture Files:
DEMO-tf2-ModelMonitor/monitoring/datacapture/
sagemaker-us-east-1-949263681218


# Prepare baseline Dataset

In [22]:
validate_dataset = "validation_with_predictions.csv"

In [None]:
# X_train, y_train
# ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
data = np.array([X_test[i]])
    payload = {'instances': data}
    resp = predictor.predict(payload)
    predicted_label = CIFAR10_LABELS[np.argmax(resp['predictions'])]

In [27]:
i = 0
with open(f"{validate_dataset}", "w") as baseline_file:
    baseline_file.write("probability,prediction,label\n")  # our header
    for i in range(1000):
        data = np.array([X_train[i]])
        payload = {'instances': data}
        resp = predictor.predict(payload)
        probability = max(resp['predictions'][0])
        prediction = np.argmax(resp['predictions'])
        label = y_train[i][0]
        
        baseline_file.write(f"{probability},{prediction},{label}\n")
print("Done!")

Done!


In [28]:
!aws s3 cp ./validation_with_predictions.csv s3://{BUCKET}/DEMO-tf2-ModelMonitor/monitoring/baseline/

upload: ./validation_with_predictions.csv to s3://sagemaker-us-east-1-949263681218/DEMO-tf2-ModelMonitor/monitoring/baseline/validation_with_predictions.csv


# MODEL MONITORING

In [29]:
from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor.dataset_format import DatasetFormat

In [30]:
my_default_monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=3600,
)

In [33]:
# GENERATE constraints and statistics
baseline_data_uri = 's3://{BUCKET}/{MONITORING_FOLDER}/monitoring/baseline/'
baseline_data_output_uri = 's3://{BUCKET}/{MONITORING_FOLDER}/monitoring/baseline/output'
my_default_monitor.suggest_baseline(
    job_name = 'test-baseline-job',
    baseline_dataset=baseline_data_uri,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_data_output_uri,
    wait=True
)


Job Name:  test-baseline-job
Inputs:  [{'InputName': 'baseline_dataset_input', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-949263681218/DEMO-tf2-ModelMonitor/monitoring/baseline/', 'LocalPath': '/opt/ml/processing/input/baseline_dataset_input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'monitoring_output', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-east-1-949263681218/DEMO-tf2-ModelMonitor/monitoring/baseline/output', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}}]
..........................[34m2022-06-24 04:20:21,447 - matplotlib.font_manager - INFO - Generating new fontManager, this may take some time...[0m
[34m2022-06-24 04:20:21.983797: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object f

<sagemaker.processing.ProcessingJob at 0x7f8070e95810>

In [36]:
import pandas as pd

baseline_job = my_default_monitor.latest_baselining_job
schema_df = pd.io.json.json_normalize(baseline_job.baseline_statistics().body_dict["features"])
schema_df.head(10)

  after removing the cwd from sys.path.


Unnamed: 0,name,inferred_type,numerical_statistics.common.num_present,numerical_statistics.common.num_missing,numerical_statistics.mean,numerical_statistics.sum,numerical_statistics.std_dev,numerical_statistics.min,numerical_statistics.max,numerical_statistics.distribution.kll.buckets,numerical_statistics.distribution.kll.sketch.parameters.c,numerical_statistics.distribution.kll.sketch.parameters.k,numerical_statistics.distribution.kll.sketch.data
0,probability,Fractional,1000,0,0.403413,403.412881,0.178749,0.116331,0.974984,"[{'lower_bound': 0.116330899, 'upper_bound': 0...",0.64,2048.0,"[[0.418848723, 0.293665916, 0.476312459, 0.483..."
1,prediction,Integral,1000,0,5.209,5209.0,2.790577,0.0,9.0,"[{'lower_bound': 0.0, 'upper_bound': 0.9, 'cou...",0.64,2048.0,"[[7.0, 1.0, 5.0, 8.0, 0.0, 9.0, 1.0, 1.0, 9.0,..."
2,label,Integral,1000,0,4.583,4583.0,2.901226,0.0,9.0,"[{'lower_bound': 0.0, 'upper_bound': 0.9, 'cou...",0.64,2048.0,"[[5.0, 8.0, 2.0, 8.0, 0.0, 4.0, 1.0, 8.0, 9.0,..."


In [37]:
constraints_df = pd.io.json.json_normalize(
    baseline_job.suggested_constraints().body_dict["features"]
)
constraints_df.head(10)

  


Unnamed: 0,name,inferred_type,completeness,num_constraints.is_non_negative
0,probability,Fractional,1.0,True
1,prediction,Integral,1.0,True
2,label,Integral,1.0,True


In [38]:
%%writefile preprocessing.py

import json
def preprocess_handler(inference_record):
    input_data = json.loads(inference_record.endpoint_input.data)
    input_data = {f"feature{str(i).zfill(10)}": val for i, val in enumerate(input_data)}

    output_data = json.loads(inference_record.endpoint_output.data)["predictions"][0][0]
    output_data = {"prediction0": output_data}

    return {**input_data}

Writing preprocessing.py


In [40]:
preprocessor_s3_dest_path = f"s3://{BUCKET}/{MONITORING_FOLDER}/monitoring/preprocessor"
preprocessor_s3_dest = sagemaker.s3.S3Uploader.upload("preprocessing.py", preprocessor_s3_dest_path)
print(preprocessor_s3_dest)

s3://sagemaker-us-east-1-949263681218/DEMO-tf2-ModelMonitor/monitoring/preprocessor/preprocessing.py


In [41]:
from sagemaker.model_monitor import CronExpressionGenerator
from time import gmtime, strftime

s3_report_path = 's3://{BUCKET}/{MONITORING_FOLDER}/monitoring/preprocessor/processed_output'
mon_schedule_name = "DEMO-tf2-model-monitor-schedule-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
my_default_monitor.create_monitoring_schedule(
    monitor_schedule_name=mon_schedule_name,
    endpoint_input=predictor.endpoint,
    record_preprocessor_script=preprocessor_s3_dest,
    output_s3_uri=s3_report_path,
    statistics=my_default_monitor.baseline_statistics(),
    constraints=my_default_monitor.suggested_constraints(),
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True,
)

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


## Exploring Generated constraints

In [None]:
import pandas as pd

baseline_job = my_default_monitor.latest_baselining_job
schema_df = pd.io.json.json_normalize(baseline_job.baseline_statistics().body_dict["features"])
schema_df.head(10)

In [None]:
constraints_df = pd.io.json.json_normalize(
    baseline_job.suggested_constraints().body_dict["features"]
)
constraints_df.head(10)

# Delete Endpoint

In [8]:
import boto3
sm = boto3.client('sagemaker')

In [11]:
sm.list_endpoints()

{'Endpoints': [{'EndpointName': 'tensorflow-cv-1656042735',
   'EndpointArn': 'arn:aws:sagemaker:us-east-1:949263681218:endpoint/tensorflow-cv-1656042735',
   'CreationTime': datetime.datetime(2022, 6, 24, 3, 52, 16, 256000, tzinfo=tzlocal()),
   'LastModifiedTime': datetime.datetime(2022, 6, 24, 3, 57, 33, 372000, tzinfo=tzlocal()),
   'EndpointStatus': 'InService'}],
 'ResponseMetadata': {'RequestId': '87b2c082-345a-4609-83da-39199e108ac6',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '87b2c082-345a-4609-83da-39199e108ac6',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '247',
   'date': 'Fri, 24 Jun 2022 05:44:06 GMT'},
  'RetryAttempts': 0}}

In [14]:
endpoint_name='tensorflow-cv-1656042735'
sagemaker_client.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '7db62db7-873f-4067-9905-9b900eb66dbf',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '7db62db7-873f-4067-9905-9b900eb66dbf',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Fri, 24 Jun 2022 05:44:59 GMT'},
  'RetryAttempts': 0}}

In [41]:
endpoint_name = 'tensorflow-cv-1655979252'

# TEST CODES

In [112]:
predictor

<sagemaker.tensorflow.model.TensorFlowPredictor at 0x7fb63b2a8b50>

In [113]:
data_capture_config

<sagemaker.model_monitor.data_capture_config.DataCaptureConfig at 0x7fb63b53e510>

In [116]:
data_capture_config.destination_s3_uri

's3://sagemaker-us-east-1-949263681218/DEMO-tf2-ModelMonitor/monitoring/datacapture'

In [9]:
sagemaker_client.list_monitoring_schedules()

{'MonitoringScheduleSummaries': [{'MonitoringScheduleName': 'DEMO-tf2-model-monitor-schedule-2022-06-24-04-33-23',
   'MonitoringScheduleArn': 'arn:aws:sagemaker:us-east-1:949263681218:monitoring-schedule/demo-tf2-model-monitor-schedule-2022-06-24-04-33-23',
   'CreationTime': datetime.datetime(2022, 6, 24, 4, 33, 23, 893000, tzinfo=tzlocal()),
   'LastModifiedTime': datetime.datetime(2022, 6, 24, 5, 1, 36, 395000, tzinfo=tzlocal()),
   'MonitoringScheduleStatus': 'Scheduled',
   'EndpointName': 'tensorflow-cv-1656042735',
   'MonitoringJobDefinitionName': 'data-quality-job-definition-2022-06-24-04-33-23-660',
   'MonitoringType': 'DataQuality'}],
 'ResponseMetadata': {'RequestId': '6ad21629-6be8-41a2-ac79-3710967fbbcf',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '6ad21629-6be8-41a2-ac79-3710967fbbcf',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '517',
   'date': 'Fri, 24 Jun 2022 05:40:30 GMT'},
  'RetryAttempts': 0}}

In [10]:
sagemaker_client.describe_monitoring_schedule(MonitoringScheduleName = 'DEMO-tf2-model-monitor-schedule-2022-06-24-04-33-23')

{'MonitoringScheduleArn': 'arn:aws:sagemaker:us-east-1:949263681218:monitoring-schedule/demo-tf2-model-monitor-schedule-2022-06-24-04-33-23',
 'MonitoringScheduleName': 'DEMO-tf2-model-monitor-schedule-2022-06-24-04-33-23',
 'MonitoringScheduleStatus': 'Scheduled',
 'MonitoringType': 'DataQuality',
 'CreationTime': datetime.datetime(2022, 6, 24, 4, 33, 23, 893000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 6, 24, 5, 1, 36, 395000, tzinfo=tzlocal()),
 'MonitoringScheduleConfig': {'ScheduleConfig': {'ScheduleExpression': 'cron(0 * ? * * *)'},
  'MonitoringJobDefinitionName': 'data-quality-job-definition-2022-06-24-04-33-23-660',
  'MonitoringType': 'DataQuality'},
 'EndpointName': 'tensorflow-cv-1656042735',
 'LastMonitoringExecutionSummary': {'MonitoringScheduleName': 'DEMO-tf2-model-monitor-schedule-2022-06-24-04-33-23',
  'ScheduledTime': datetime.datetime(2022, 6, 24, 5, 0, tzinfo=tzlocal()),
  'CreationTime': datetime.datetime(2022, 6, 24, 5, 1, 10, 342000, tzin

In [13]:
sagemaker_client.delete_monitoring_schedule(MonitoringScheduleName = 'DEMO-tf2-model-monitor-schedule-2022-06-24-04-33-23')

{'ResponseMetadata': {'RequestId': 'beb4e058-916f-427b-8361-8f0b1ba9a7f1',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'beb4e058-916f-427b-8361-8f0b1ba9a7f1',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Fri, 24 Jun 2022 05:44:55 GMT'},
  'RetryAttempts': 0}}

In [None]:
https://stackoverflow.com/questions/69179914/how-to-fix-sagemaker-data-quality-monitoring-schedule-job-that-fails-with-failu

In [16]:
https://sagemaker-examples.readthedocs.io/en/latest/sagemaker_model_monitor/fairness_and_explainability/SageMaker-Model-Monitor-Fairness-and-Explainability.html