### Model Monitoring

In [None]:
%store -r
import os,json, operator
from dkube.sdk import *
from dkube.sdk.api import DkubeApi
from dkube.sdk.rsrcs.operator import DkubeCluster
from dkube.sdk.rsrcs import DkubeModelmonitor
from dkube.sdk.rsrcs.modelmonitor import DatasetClass,ModelType,DriftAlgo, DataType
from dkube.sdk.rsrcs.modelmonitor import DatasetFormat,DkubeModelmonitorAlert, TimeZone

In [None]:
DKUBEUSERNAME = sgmkr_config['DKUBEUSERNAME']
DKUBE_BASE_DATASET = sgmkr_config['DKUBE_BASE_DATASET']
ENDPOINT_NAME = sgmkr_config['ENDPOINT_NAME']
MONITOR_NAME = sgmkr_config['MONITOR_NAME'] 
ACCESS_KEY = sgmkr_config['ACCESS_KEY']
SECRET_KEY = sgmkr_config['SECRET_KEY']
BUCKET = sgmkr_config['BUCKET']
PREFIX = sgmkr_config['PREFIX']
REGION_NAME = sgmkr_config['REGION_NAME']
SAGEMAKER_DKUBE_CLUSTER_NAME = sgmkr_config['SAGEMAKER_DKUBE_CLUSTER_NAME']

In [None]:
def get_dataset_version(username, dataset_name, version):
    dataset_versions = api.get_dataset_versions(username, dataset_name)
    versions = []
    for each_version in dataset_versions:
        if each_version["version"]["name"] == version:
            uuid = each_version["version"]["uuid"]
            return f"{version}:{uuid}"
        else:
            versions.append(each_version["version"]["name"])
    return f"dataset version {version} not found, available version are {versions}"

In [None]:
api = DkubeApi(token=os.getenv("DKUBE_USER_ACCESS_TOKEN"))

#### Predict Dataset

In [None]:
PREDICT_PREFIX = PREFIX +"/datacapture/"+ENDPOINT_NAME+"/AllTraffic/"

In [None]:
pname =  MONITOR_NAME+'-predict'
try:
    dataset = DkubeDataset(DKUBEUSERNAME, name=pname,remote=True)
    dataset.update_dataset_source('aws_s3')
    dataset.update_awss3_details(
        bucket=BUCKET,
        prefix=PREDICT_PREFIX,key=os.getenv("AWS_ACCESS_KEY_ID",ACCESS_KEY),
        secret=os.getenv("AWS_SECRET_ACCESS_KEY",SECRET_KEY))
    api.create_dataset(dataset)
    
except Exception as e:
    if e.reason:
        if e.reason.lower() != "conflict":
            response = e.body
            print(f"Failed[{response.code}]: {response.message}")
    else:
        raise e

#### Configure Sagemaker Cluster in Dkube

In [None]:
pcluster = DkubeCluster(name=SAGEMAKER_DKUBE_CLUSTER_NAME)
pcluster.update_kind("sagemaker")
pcluster.update_class("monitoring")
pcluster.update_authtype("access_keys")
pcluster.update_access_keys(ACCESS_KEY,SECRET_KEY)
pcluster.cluster.access_keys.region = REGION_NAME

In [None]:
try:
    api.configure_clusters(pcluster.cluster)
except Exception as e:
    print(e)

#### Import Deployment

In [None]:
id = api.import_deployment(name=MONITOR_NAME,cluster=SAGEMAKER_DKUBE_CLUSTER_NAME,variant="AllTraffic")

In [None]:
text_file = open("transform-data.py", "r")
#read whole file to a string
script = text_file.read()
#close file
text_file.close()

with open('thresholds.json') as f:
    thresholds = json.load(f)

#### Model Monitor

In [None]:
mm=DkubeModelmonitor(id)
mt=ModelType.Regression
dc_t=DatasetClass.Train

In [None]:
mm.update_modelmonitor_basics(model_type=ModelType.Regression.value, 
                               input_data_type=DataType.Tabular.value,
                               data_timezone=TimeZone.UTC.value)
mm.add_thresholds(thresholds=thresholds)

#### Training Details

In [None]:
training_data = f'{DKUBEUSERNAME}:{DKUBE_BASE_DATASET}'
train_data_version = get_dataset_version(DKUBEUSERNAME,
                                            DKUBE_BASE_DATASET, "v1")
prediction_data = f"{DKUBEUSERNAME}:{MONITOR_NAME}-predict"
labelled_data = f"{DKUBEUSERNAME}:{MONITOR_NAME}-groundtruth"
predict_data_format = str(DatasetFormat.Sagemakerlogs)

In [None]:
mm.add_datasources(data_class=str(DatasetClass.Train),
                   name=training_data,data_format=str(DatasetFormat.Tabular),
                   version=train_data_version,transformer_script = script)
mm.add_datasources(data_class=str(DatasetClass.Predict),
                   name=prediction_data,data_format=predict_data_format,
                   date_suffix="yyyy/mm/dd/hh")
mm.add_datasources(data_class=str(DatasetClass.Labelled),
                   name=labelled_data,data_format=str(DatasetFormat.Tabular),
                   predict_col="charges",groundtruth_col="GT_target",timestamp_col="timestamp")

#### Add Drift monitoring details

In [None]:
mm.update_drift_monitoring_details(enabled=True,frequency=5,algorithm='auto')

#### Create Model monitor

In [None]:
id = api.modelmonitor_create(mm,wait_for_completion=True)

#### Schema update

In [None]:
api.modelmonitor_update_schema(id,label='charges',schema_class='continuous',schema_type="prediction_output")
api.modelmonitor_update_schema(id,label='unique_id',schema_class='continuous',schema_type="row_id")
api.modelmonitor_update_schema(id,label='timestamp',schema_class='continuous',schema_type="timestamp")

## age and bmi to continuous
api.modelmonitor_update_schema(id,label='age',schema_class='continuous',schema_type='input_feature', selected=True)
api.modelmonitor_update_schema(id,label='bmi',schema_class='continuous',schema_type='input_feature', selected=True)

## select these features
api.modelmonitor_update_schema(id,label='sex',schema_type='input_feature',schema_class='categorical', selected=True)
api.modelmonitor_update_schema(id,label='children',schema_type='input_feature',schema_class='categorical', selected=True)
api.modelmonitor_update_schema(id,label='smoker',schema_type='input_feature',schema_class='categorical', selected=True)
api.modelmonitor_update_schema(id,label='region',schema_type='input_feature',schema_class='categorical', selected=True)

#### Performance Monitoring

In [None]:
mm=DkubeModelmonitor(id)
mm.update_performance_monitoring_details(enabled=True,source_type="labelled_data",frequency=5)

In [None]:
api.modelmonitor_update(mm)

#### Deployment Health monitoring

In [None]:
mm=DkubeModelmonitor(id)
mm.update_deployment_monitoring_details(enabled=True,frequency=1)

In [None]:
api.modelmonitor_update(mm)

### Add alerts

#### Deployment Health Alert

In [None]:
alert = DkubeModelmonitorAlert(name='latency_alert', alert_class = 'deployment_health')
alert.add_alert_condition(metric='latency_avg',threshold=300, op=operator.gt)
api.modelmonitor_add_alert(id,alert)

#### Feature Alert

In [None]:
alert = DkubeModelmonitorAlert(name='age_alert', alert_class = 'feature_drift')
alert.add_alert_condition(feature='age',threshold=0.1, op=operator.gt)
api.modelmonitor_add_alert(id,alert)

#### Performance Alert

In [None]:
alert = DkubeModelmonitorAlert(name='mae_alert', alert_class = 'performance_decay')
alert.add_alert_condition(metric='mae',threshold=2000, op=operator.gt)
api.modelmonitor_add_alert(id,alert)

#### Start the model monitor

In [None]:
api.modelmonitor_start(id)

#### Cleanup

In [None]:
CLEANUP = False
if CLEANUP:
    from time import sleep
    RETRIES = 4
    while RETRIES:
        mm = api.modelmonitor_get(id)
        if mm["status"] and mm["status"]["state"].lower() != "active":
            break
        elif mm["status"] and mm["status"]["state"].lower() == "active":
            api.modelmonitor_stop(id)
        RETRIES -= 1
        sleep(5)
    else:
        raise TimeoutError("modelmonitor failed to stopped")
    api.modelmonitor_delete(id)
    api.delete_dataset(DKUBEUSERNAME,pname,force=True)