# ML Model Onboarding  Accelerator - Watson Machine Learning
_**Automate Watson Machine Learning Model Onboarding process - Governance in AI Factsheets - Monitoring in Watson OpenScale**_

### how to use <i>mlmonitor</i> client
- [Initialize token](#tokens)
- [Train model in WML](#train_wml_job)
- [Deploy Online model](#deploy_wml_endpoint)
- [Score model](#score_endpoint)
- [Configure Monitoring](#monitor_endpoint)
- [Configure Quality](#quality_monitor)
- [Configure Explainability](#explain_monitor)
- [Configure Fairness Monitoring](#fairness_monitor)
- [Configure Drift Monitoring](#drift_monitor)
- [Log feedback data](#feedback_logging)
- [Log payload data](#payload_logging)
- [Evaluate monitors](#evaluate_monitors)
- [Drift Scenario](#data_drift_scenario)
- [Delete Deployment](#delete_resources)
---

#### setup credentials for mlmonitor library

In [9]:
import os
import sys
import json

os.environ['ENV'] = 'prem'
os.environ['LOG_LEVEL'] = '10'
os.environ['VERIFY_CP4D_SSL'] = "False"
config_filename = 'credentials_uploaded2cp4d_prem.cfg'


lib_path = os.path.abspath('../')
sys.path.append(lib_path)
os.environ['MONITOR_CONFIG_FILE'] = os.path.join(lib_path, config_filename)

### test specific tasks from WMLModelUseCase

In [10]:
from mlmonitor import WMLModelUseCase    
from mlmonitor.src.demos.model_perturbator import ModelPerturbator

source_dir = 'use_case_gcr'
model_endpoint = "local-tet-1"


# Model use case definition
catalog_id = '9974a224-8dfd-4db8-b57b-537701403b30'
model_entry_id = '0f5f6ea7-0356-49d4-ae32-bc0f6962b7fa'


model_use_case = WMLModelUseCase(source_dir=source_dir,
                                 catalog_id=catalog_id,
                                 model_entry_id=model_entry_id)
print(model_use_case._wos_configured)
print(model_use_case._wml_configured)

if model_endpoint:
    model_use_case.derive_model_states(endpoint_name=model_endpoint)

model_use_case.display_states()

True
True

              - trained:   [False]
              - deployed:  [False]
              - governed:  [False]
              - monitored: [False]



### Train model in WML Job <a name="train_wml_job"></a>

In [3]:
from mlmonitor.src import (
    API_KEY,
    ENV,
    CATALOG_ID,
    MODEL_ENTRY_ID,
    logger,
    PROJECT_ROOT,
    DATA_ROOT,
    MODEL_ROOT,
)

In [4]:
import importlib

train = getattr(
    importlib.import_module(
        f"mlmonitor.{model_use_case._model_config.source_dir}.{model_use_case._model_config.train_module}"
    ),
    model_use_case._model_config.train_method,
)

parameters = model_use_case._model_config.hyperparameters

# Specifying endpoints if not already assigned
if not model_use_case.model_endpoint:
    model_use_case.model_endpoint = model_use_case._assign_model_endpoint()

(
    facts_client,
    model_props,
    _,
    experiment_id,
    fs_tags,
) = model_use_case.init_internal_fs_client(
    logger=logger,
    catalog_id=model_use_case.catalog_id,
    model_entry_id=model_use_case.model_entry_id,
)

# Dictionary of metrics, dict of params, dict of tags
model_data = train(
    model_dir=MODEL_ROOT,
    data_path=os.path.join(DATA_ROOT, model_use_case._model_config.data_dir),
    train_dataset=model_use_case._model_config.training_data,
    val_dataset=model_use_case._model_config.validation_data,
    test_dataset=model_use_case._model_config.test_data,
    logger=logger,
    **parameters,
)

model_use_case.is_trained = True
model_use_case.model_data = model_data




2024/03/19 07:53:49 INFO : Experiment local-tet-90 does not exist, creating new experiment
2024/03/19 07:53:49 INFO : Experiment successfully created with ID 785127679486898573 and name local-tet-90
2024/03/19 07:53:49 INFO : Autolog enabled Successfully
fetch_dataset ['.DS_Store', 'german_credit_data_biased_training.csv', '.gitkeep', 'gcr_feedback_logging_with_pred_aws2.csv', 'gcr_feedback_logging_aws.csv', 'gcr_feedback_logging_with_pred_aws.csv', 'gcr_explicit_payload_logging.csv', 'test_feedback_data_gcr.csv']
2024/03/19 07:53:50 INFO : logging results to factsheet for run_id ef92a2baf94c485393681320cf865293
2024/03/19 07:53:52 INFO : Successfully logged results to Factsheet service for run_id ef92a2baf94c485393681320cf865293 under asset_id: 179c581f-b31e-46e4-a652-73a25de5b57e and space_id : 72e47ac8-fb23-4ac0-96b8-f593b2fce652
2024/03/19 07:53:52 INFO : logging results to factsheet for run_id 0069be8ed1f9470a82b9cb1a81e9dbd9
2024/03/19 07:53:53 INFO : Successfully logged results 

In [5]:
# SPDX-License-Identifier: Apache-2.0
import os
import joblib
import re

from ibm_watson_machine_learning import APIClient
from mlmonitor.src.wml import wml_client as WML_CLIENT, WML_SPACE_ID
from mlmonitor.src import API_KEY, logger, DATA_ROOT
from mlmonitor.src.utils.validation import validate_hdf_file, is_csv
from mlmonitor.src.utils.file_utils import make_model_tgzfile
from mlmonitor.src.model.config_wml import WMLModelconfig
from mlmonitor.data import BUCKET_NAME, COS_ENDPOINT, COS_RESOURCE_CRN



framework = model_use_case._model_config.inference_framework

if framework == "scikit-learn":
    logger.info(f"{framework} => Reloading model from joblib")
    model = joblib.load(model_data)

elif framework == "tensorflow":
    logger.info(f"{framework} => Generate model archive for upload")
    is_h5 = validate_hdf_file(model_data)

    if not is_h5:
        raise ValueError("Invalid model file should be a valid hdf (.h5) file")

    model_details = re.match(r"^(/.*)/(.*).h5$", model_data)
    output_tgz_name = f"{model_details[2]}.tgz"
    model_filename = f"{model_details[2]}.h5"
    model_path = model_details[1]
    model = make_model_tgzfile(
        output_filename=output_tgz_name,
        source_dir=model_path,
        filename=model_filename,
    )

elif framework == "xgboost":
    logger.info(f"{framework} => Reloading model from joblib")
    model = joblib.load(model_data)
else:
    raise ValueError('Invalid framework ["scikit-learn", "tensorflow", "xgboost"]')

######################################################################
# RETRIEVE MODEL USE CASE SPECIFIC CONFIGURATION DETAILS (SIGNATURE) #
######################################################################
data_type = model_use_case._model_config.data_type

framework_version = model_use_case._model_config.inference_framework_version
target = model_use_case._model_config.class_label

train_data_path = model_use_case._model_config._get_data_location(dataset_type="train")
dataset_is_csv = is_csv(train_data_path)

dataset = model_use_case._model_config._get_data(dataset_type="train")

if data_type == "structured":

    scoring_df = dataset.loc[:, model_use_case._model_config.feature_columns]
    labels = dataset[target]

elif data_type == "unstructured_image":

    samples, labels = dataset
    logger.info("samples shape sent for inference", samples.shape)

    scoring_df = None
    labels = None

WML_CLIENT.set.default_space(space_uid=WML_SPACE_ID)

##########################
# Create Model Asset     #
##########################

software_spec_uid = WML_CLIENT.software_specifications.get_id_by_name(
    model_use_case._model_config.inference_instance
)
logger.info(
    f"Software Specification {model_use_case._model_config.inference_instance} ID: {software_spec_uid}"
)

model_props = {
    WML_CLIENT._models.ConfigurationMetaNames.NAME: model_use_case.model_endpoint,
    WML_CLIENT._models.ConfigurationMetaNames.TYPE: f"{framework}_{framework_version}",
    WML_CLIENT._models.ConfigurationMetaNames.SOFTWARE_SPEC_UID: software_spec_uid,
}

###############################################################################
# Create TRAINING DATA REFERENCE  (COS Bucket only) for structured datasets   #
###############################################################################
if data_type == "structured" and dataset_is_csv:
    from mlmonitor.data.cos import put_item
    from mlmonitor.src import IAM_URL
    from mlmonitor.data import CLOUD_API_KEY

    # TODO training_data_references for CP4D on prem should be DB2
    put_item(
        item_name=model_use_case._model_config.training_data,
        item_path=os.path.join(DATA_ROOT, model_use_case._model_config.data_dir),
    )
    datasource_type = WML_CLIENT.connections.get_datasource_type_uid_by_name(
        "bluemixcloudobjectstorage"
    )
    conn_meta_props = {
        WML_CLIENT.connections.ConfigurationMetaNames.NAME: f"COS Connection for {model_use_case.model_endpoint}",
        WML_CLIENT.connections.ConfigurationMetaNames.DATASOURCE_TYPE: datasource_type,
        WML_CLIENT.connections.ConfigurationMetaNames.DESCRIPTION: "Connection to COS",
        WML_CLIENT.connections.ConfigurationMetaNames.PROPERTIES: {
            "bucket": BUCKET_NAME,
            "api_key": CLOUD_API_KEY,
            "resource_instance_id": COS_RESOURCE_CRN,
            "iam_url": IAM_URL,
            "url": COS_ENDPOINT,
        },
    }

    # Data source details
    conn_details = WML_CLIENT.connections.create(meta_props=conn_meta_props)
    connection_id = WML_CLIENT.connections.get_uid(conn_details)

    training_data_references = [
        {
            "id": model_use_case.model_endpoint,
            "type": "connection_asset",
            "connection": {
                "id": connection_id,
                "href": f"/v2/connections/{connection_id}?space_id={WML_SPACE_ID}",
            },
            "location": {
                "bucket": BUCKET_NAME,
                "file_name": model_use_case._model_config.training_data,
            },
        }
    ]

    model_props[
        WML_CLIENT._models.ConfigurationMetaNames.TRAINING_DATA_REFERENCES
    ] = training_data_references
    model_props[WML_CLIENT._models.ConfigurationMetaNames.LABEL_FIELD] = target

##########################
# Create Model Asset     #
##########################
facts_client.export_facts.prepare_model_meta(WML_CLIENT, model_props)

logger.info(f"Storing model to deployment space {WML_SPACE_ID}")
published_model_details = WML_CLIENT.repository.store_model(
    model=model,
    meta_props=model_props,
    training_data=scoring_df,
    training_target=labels,
)

model_uid = WML_CLIENT.repository.get_model_id(published_model_details)
logger.info(f"Model Asset creation Completed with Model ID: {model_uid}")

if data_type == "unstructured_image":
    os.remove(model)


Uploading item to bucket: ice-demo-prem, key: german_credit_data_biased_training.csv
Creating connections...
SUCCESS


In [6]:
model_use_case.model_uid = model_uid

run_id = facts_client.runs.get_current_run_id()

if len(fs_tags) > 0:
    facts_client.runs.set_tags(run_id, fs_tags)
    logger.debug(
        f"save_fs_model Factsheets tags :\n{json.dumps(fs_tags, indent=4)}"
    )

# Exporting facts
logger.info(f"Current Experiment ID {experiment_id}")
runs = facts_client.runs.list_runs_by_experiment(experiment_id)
logger.info(f"Runs :\n{runs}")

facts_client.export_facts.export_payload(run_id)

2024/03/19 07:54:08 INFO : Initiating logging to factsheet for run_id......778403d4fd764659a2ae7611ad27e38a
2024/03/19 07:54:10 INFO : Successfully logged results to Factsheet service for run_id 778403d4fd764659a2ae7611ad27e38a under asset_id: 179c581f-b31e-46e4-a652-73a25de5b57e and space_id : 72e47ac8-fb23-4ac0-96b8-f593b2fce652


In [8]:
# Get WML model and track it
wml_model = facts_client.assets.get_model(
    model_id=model_uid
)  # wml_stored_model_details=?

muc_utilities = facts_client.assets.get_model_usecase(
    model_usecase_id=model_use_case.model_entry_id,
    catalog_id=model_use_case.catalog_id,
)

wml_model.track(
    usecase=muc_utilities,
    approach=muc_utilities.get_approaches()[0],
    version_number="minor",  # "0.1.0"
)

2024/03/19 07:54:42 INFO : Current model information: {'asset_id': 'b7196642-e0c7-4c2b-a378-60c598e0fa5d', 'container_type': 'space', 'container_id': '72e47ac8-fb23-4ac0-96b8-f593b2fce652', 'facts_type': 'modelfacts_user'}
2024/03/19 07:54:43 INFO : Current model usecase information: {'model_usecase_id': '0f5f6ea7-0356-49d4-ae32-bc0f6962b7fa', 'container_type': 'catalog', 'catalog_id': '9974a224-8dfd-4db8-b57b-537701403b30', 'facts_type': 'model_entry_user'}
2024/03/19 07:54:44 INFO : Approaches retrieved successfully
2024/03/19 07:54:45 INFO : Initiate linking model to existing model use case 0f5f6ea7-0356-49d4-ae32-bc0f6962b7fa
2024/03/19 07:55:17 INFO : Model registration failed


ClientError: Model registration failed. Error: 400. {"errors":[{"code":"Bad Request","message":"Model: b7196642-e0c7-4c2b-a378-60c598e0fa5d is already being tracked with AI Use Case: 0f5f6ea7-0356-49d4-ae32-bc0f6962b7fa with catalog id: 9974a224-8dfd-4db8-b57b-537701403b30, hence cannot track this model."}],"trace":"a3j8snwnfr1jgag0ymthl00b8"}

In [3]:
model_use_case.train()

2024/03/18 15:50:41 INFO : Experiment local-test-wml-3 does not exist, creating new experiment
2024/03/18 15:50:41 INFO : Experiment successfully created with ID 336551981225336957 and name local-test-wml-3
2024/03/18 15:50:41 INFO : Autolog enabled Successfully
fetch_dataset ['german_credit_data_biased_training.csv', '.gitkeep', 'gcr_feedback_logging_with_pred_aws2.csv', 'gcr_feedback_logging_aws.csv', 'gcr_feedback_logging_with_pred_aws.csv', 'gcr_explicit_payload_logging.csv', 'test_feedback_data_gcr.csv']
2024/03/18 15:50:44 INFO : logging results to factsheet for run_id 9c56dbee66444833871aea62395b0b89
2024/03/18 15:50:45 INFO : Successfully logged results to Factsheet service for run_id 9c56dbee66444833871aea62395b0b89 under asset_id: 3ee8ad70-b06c-4f32-ac21-455fc80328d3 and space_id : 72e47ac8-fb23-4ac0-96b8-f593b2fce652
2024/03/18 15:50:47 INFO : logging results to factsheet for run_id 4c3aceca323041c7a055012999022f32
2024/03/18 15:50:48 INFO : Successfully logged results to Fa

ClientError: Model registration failed. Error: 400. {"errors":[{"code":"Bad Request","message":"Model: 6c5ab363-7163-4a5f-a5e2-f5649cb62632 is already being tracked with AI Use Case: 911b990b-16f7-45da-9cd9-143062e66f79 with catalog id: 9974a224-8dfd-4db8-b57b-537701403b30, hence cannot track this model."}],"trace":"asp0mv2d16a357hec8ybef58t"}

### Deploy WML online endpoint <a name="deploy_wml_endpoint"></a>

In [None]:
model_use_case.deploy()

### Score WML model <a name="score_endpoint"></a>

In [None]:
model_use_case.score_model()

### Configure WML Monitoring <a name="monitor_endpoint"></a>

In [None]:
model_use_case.monitor()

### Configure Quality Monitor <a name="quality_monitor"></a>

In [None]:
model_use_case.configure_quality_monitor()

### Configure Explainability Monitor <a name="explain_monitor"></a>

In [None]:
model_use_case.configure_explainability_monitor()

### Configure Fairness Monitor <a name="fairness_monitor"></a>

In [None]:
model_use_case.configure_fairness_monitor()

### Configure Drift Monitor <a name="drift_monitor"></a>

In [None]:
model_use_case.configure_drift_monitor()

### Log feedback data <a name="feedback_logging"></a>

In [None]:
model_use_case.log_feedback()

### Log Payload data <a name="payload_logging"></a>

In [None]:
model_use_case.log_payload()

### Evaluate monitors <a name="evaluate_monitors"></a>

In [None]:
model_use_case.evaluate_model()

### Run a data drift scenario <a name="data_drift_scenario"></a>

In [None]:
model_perturbation = ModelPerturbator(
        source_dir=source_dir,
        monitor_type='drift',
        scenario_id='single_column_1',
    )

model_perturbation.ratios = [0.1, 0.2, 0.3, 0.2, 0.1]

model_use_case.data_drift_scenario(model_perturbation)

### Delete Resources related to Model <a name="delete_resources"></a>

In [None]:
#model_use_case.cleanup()