# Monitor your ML Models using Watson OpenScale and WML on Cloud Pak for Data 

## 1. Setup the Notebook Environment

### 1.1 Install the necessary packages

In [None]:
!rm -rf /home/spark/shared/user-libs/python3.6*

!pip install ibm-cloud-sdk-core --no-cache | tail -n 1
!pip install --upgrade ibm-watson-openscale==3.0.2 --no-cache | tail -n 1
!pip install scikit-learn==0.24.0 | tail -n 1
!pip install --upgrade ibm-watson-machine-learning==1.0.45 --user | tail -n 1

### <font color=red>ACTION: Restart the Notebook after Installing the required packages by clicking on Kernel>Restart. </font>

### 1.2 Import Packages

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn import preprocessing
from sklearn import svm, metrics
from scipy import sparse
from ibm_watson_machine_learning import APIClient as WMLAPIClient
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import json
import ibm_db
import os

import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV

from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import APIClient as WOSAPIClient
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

## 2. Configuration

### 2.1 Global Variables

### <font color=red>ACTION: Provide the values for SCHEMA_NAME, BUCKET_NAME and DEP_NAME before continuing. </font>

In [None]:
MODEL_NAME="Tower A"
DEPLOYMENT_NAME="Tower A Dep"

# Ensure you create an empty Schema in the database that you will use as the data mart and store the name in this variable.
SCHEMA_NAME=""

# Ensure you create a COS bucket where the training data will be stored and store the name in this variable.
BUCKET_NAME = "aiostrainingdata"

# Enter the name of the Deployment Space
DEP_NAME="AIOS_deployment_space"

### 2.2 Add Dataset

Select the Insert Pandas Dataframe option, after selecting the below cell. Ensure the variable name is df_data_1

In [None]:
# Select the Insert Pandas Dataframe option, after placing your cursor in the next line. Ensure the variable name is df_data_1
df_data_1 = pd.read_csv('/project_data/data_asset/call_drop_data_train.csv')
df_data_1.head()

### 2.3 Provision services and configure credentials

### <font color=red>ACTION: Provide the credentials for your Watson OpenScale instance, Watson Machine Learning instance, the Database instance that will be used as the DataMart for Watson OpenScale and your Cloud Object Storage instance in the next 4 cells before continuing. </font>

In [None]:
# Watson OpenScale credentials

WOS_CREDENTIALS={
    "url" : "",
    "username":"",
    "password":""
}

In [None]:
# Watson Machine Learning credentials

WML_CREDENTIALS = {
    "url": WOS_CREDENTIALS["url"],
    "username": WOS_CREDENTIALS["username"],
    "password": WOS_CREDENTIALS["password"],
    "instance_id": "wml_local",
    "version": "3.5"
}
print(WML_CREDENTIALS)

In [None]:
# Database Credentials - Db2/Db2WH or PostgreSQL
# If you want to use SSL, set the value of SSL to true and provide the values for sslmode and certificate_base64.

DATABASE_CREDENTIALS = {
    "db": "",
    "hostname": "",
    "password": "",
    "port": ,
    "username": "",
    #"sslmode":"verify-full",
    #"certificate_base64":"",
    "ssl":False
}

#Provide a value of type DatabaseTypeEnum: either DatabaseType.DB2 or DatabaseType.PostgreSQL
DATABASE_TYPE = DatabaseType.DB2

In [None]:
# Cloud Object Storage credentials

COS_API_KEY_ID = ""
COS_RESOURCE_CRN = "" # eg "crn:v1:bluemix:public:cloud-object-storage:global:a/3bf0d9003abfb5d29761c3e97696b71c:d6f04d83-6c4f-4a62-a165-696756d63903::"
COS_ENDPOINT = "https://s3.us-east.cloud-object-storage.appdomain.cloud" # Current list avaiable at https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints

### 2.4 Upload the training data set to Cloud Object Storage

In [None]:
import ibm_boto3
from ibm_botocore.client import Config, ClientError

cos_client = ibm_boto3.resource("s3",
    ibm_api_key_id=COS_API_KEY_ID,
    ibm_service_instance_id=COS_RESOURCE_CRN,
    ibm_auth_endpoint="https://iam.bluemix.net/oidc/token",
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT
)

In [None]:
from io import StringIO

training_data_file_name = 'call_drop_data_train.csv'
csv_buffer = StringIO()
df_data_1.to_csv(csv_buffer)
cos_client.Object(BUCKET_NAME, training_data_file_name).put(Body=csv_buffer.getvalue())

## 3. Create the Call Drop Model using Scikit-Learn

In [None]:
X=df_data_1.drop(['Call_Drop_Actual'], axis=1)
y=df_data_1.loc[:, 'Call_Drop_Actual']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [None]:
'''Add a categorical transformer to your model pipeline. 
    You will need to add a label encoder into the model pipeline before storing it into WML '''

categorical_features = ["Start_Time_MM_DD_YYYY", "Traffic", " _conds", "Start_Time_HH_MM_SS_s"]
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)])

In [None]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                      ('model', svm.SVC(kernel='linear'))])

In [None]:
model = pipeline.fit(X_train,y_train)

In [None]:
wml_client = WMLAPIClient(WML_CREDENTIALS)

In [None]:
software_spec_uid=wml_client.software_specifications.get_id_by_name('default_py3.7')
meta_props = {
               wml_client.repository.ModelMetaNames.NAME: MODEL_NAME,
               wml_client.repository.ModelMetaNames.TYPE: 'scikit-learn_0.23',
               wml_client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid
}

## 4. Set the Deployment Space

In [None]:
project_id = os.environ['PROJECT_ID']
wml_client.set.default_project(project_id)

In [None]:
def guid_from_space_name(wml_client, space_name):

    all_spaces = wml_client.spaces.get_details()['resources']
    space_id = None
    for space in all_spaces:
        if space['entity']['name'] == space_name:
            space_id = space["metadata"]["id"]
            print("\nDeployment Space ID: ", space_id)

    if space_id is None:
        print("WARNING: Your space does not exist. Create a deployment space before proceeding to the next cell.")
    
    return space_id

In [None]:
# Enter the name of your deployment space of the current project

space_uid = guid_from_space_name(wml_client, DEP_NAME)

In [None]:
wml_client.set.default_space(space_uid)

## 5. Store, Deploy and Score your Custom WML Model

In [None]:
deploy_meta = {
     wml_client.deployments.ConfigurationMetaNames.NAME: DEPLOYMENT_NAME,
     wml_client.deployments.ConfigurationMetaNames.ONLINE: {}
 }

In [None]:
## Store the model on WML
published_model = wml_client.repository.store_model(model=model,
                                             meta_props=meta_props,
                                             training_data=X_train,
                                             training_target=y_train
                                                )

In [None]:
published_model_uid = wml_client.repository.get_model_uid(published_model)

In [None]:
## Create a Deployment for your stored model

created_deployment = wml_client.deployments.create(published_model_uid, meta_props=deploy_meta)

In [None]:
deployment_uid=created_deployment['metadata']['id']
print(created_deployment)

In [None]:
score=X_test.tail(20)
score

In [None]:
scoring_data=list(list(x) for x in zip(*(score[x].values.tolist() for x in score.columns)))
scoring_data

In [None]:
fields=list(X_test.columns)
print(len(fields))
fields, scoring_data[0]

In [None]:
job_payload = {
    wml_client.deployments.ScoringMetaNames.INPUT_DATA: [{
        'fields': fields,
        'values': scoring_data
    }]
}
print(job_payload)

In [None]:
scoring_response = wml_client.deployments.score(deployment_uid, job_payload)

In [None]:
print(scoring_response)

## 6. Setup your Watson Openscale Dashboard

### 6.1 Create the authenticator for the Watson Openscale Client

In [None]:
authenticator = CloudPakForDataAuthenticator(
        url=WOS_CREDENTIALS['url'],
        username=WOS_CREDENTIALS['username'],
        password=WOS_CREDENTIALS['password'],
        disable_ssl_verification=True
    )

### 6.2 Create the Watson Openscale Client

In [None]:
wos_client = WOSAPIClient(service_url=WOS_CREDENTIALS['url'],authenticator=authenticator)
wos_client.version

### 6.3 Setup the Data Mart on Watson OpenScale

In [None]:
wos_client.data_marts.show()

In [None]:
data_marts = wos_client.data_marts.list().result.data_marts
if len(data_marts) == 0:
    if DATABASE_CREDENTIALS is not None:
        if SCHEMA_NAME is None: 
            print("Please specify the SCHEMA_NAME and rerun the cell")

        print('Setting up external datamart')
        if(DATABASE_CREDENTIALS['ssl'] == False): 
            added_data_mart_result = wos_client.data_marts.add(
                    background_mode=False,
                    name="WOS Data Mart",
                    description="Data Mart created by WOS tutorial notebook",
                    database_configuration=DatabaseConfigurationRequest(
                        database_type=DatabaseType.DB2,
                        credentials=PrimaryStorageCredentialsLong(
                            hostname=DATABASE_CREDENTIALS['hostname'],
                            username=DATABASE_CREDENTIALS['username'],
                            password=DATABASE_CREDENTIALS['password'],
                            db=DATABASE_CREDENTIALS['db'],
                            port=DATABASE_CREDENTIALS['port'],
                            ssl=DATABASE_CREDENTIALS['ssl']#,
                            #sslmode=DATABASE_CREDENTIALS['sslmode'],
                            #certificate_base64=DATABASE_CREDENTIALS['certificate_base64']
                        ),
                        location=LocationSchemaName(
                            schema_name=SCHEMA_NAME
                        )
                    )
                 ).result
        else:
            added_data_mart_result = wos_client.data_marts.add(
                    background_mode=False,
                    name="WOS Data Mart",
                    description="Data Mart created by WOS tutorial notebook",
                    database_configuration=DatabaseConfigurationRequest(
                        database_type=DatabaseType.DB2,
                        credentials=PrimaryStorageCredentialsLong(
                            hostname=DATABASE_CREDENTIALS['hostname'],
                            username=DATABASE_CREDENTIALS['username'],
                            password=DATABASE_CREDENTIALS['password'],
                            db=DATABASE_CREDENTIALS['db'],
                            port=DATABASE_CREDENTIALS['port'],
                            ssl=DATABASE_CREDENTIALS['ssl'],
                            sslmode=DATABASE_CREDENTIALS['sslmode'],
                            certificate_base64=DATABASE_CREDENTIALS['certificate_base64']
                        ),
                        location=LocationSchemaName(
                            schema_name=SCHEMA_NAME
                        )
                    )
                 ).result
        data_mart_id = added_data_mart_result.metadata.id
    else:
        print('ERROR!!! Database credentials not found. Please provide DATABASE_CREDENTIALS and then retry adding the data mart.')
        
else:
    data_mart_id=data_marts[0].metadata.id
    print('Using existing datamart {}. If you do not wish to use this datamart and wish to create and use a new data mart instead, you will first need to delete the existing data mart using the commented line in the code cell below and then run this cell again to create the new data mart.'.format(data_mart_id))

In [None]:
def delete_data_mart(wos_client, data_mart_id):
    wos_client.data_marts.delete(data_mart_id)
    print("Deleted existing data_mart: {}".format(data_mart_id))

#delete_data_mart(wos_client, data_mart_id)

In [None]:
data_mart_details = wos_client.data_marts.get(data_mart_id).result.to_dict()
data_mart_details

### 6.4 Add (Watson Machine Learning) service provider

In [None]:
wos_client.service_providers.show()

In [None]:
SERVICE_PROVIDER_NAME = "Watson Machine Learning V2"
SERVICE_PROVIDER_DESCRIPTION = "Added by tutorial WOS notebook."
service_provider_id=None
service_providers = wos_client.service_providers.list().result.service_providers
for service_provider in service_providers:
    service_instance_name = service_provider.entity.name
    if service_instance_name == SERVICE_PROVIDER_NAME:
        service_provider_id=service_provider.metadata.id
        print("Found existing service provider with this name. Setting the service_provider_id as " + service_provider_id)
        print("If you still wish to add the new service provider, either change the SERVICE_PROVIDER_NAME or delete the existing service provider " +
              "using the commented code in the next cell and then run this code to add the service provider.")
if service_provider_id is None:
    added_service_provider_result = wos_client.service_providers.add(
        name=SERVICE_PROVIDER_NAME,
        description=SERVICE_PROVIDER_DESCRIPTION,
        service_type=ServiceTypes.WATSON_MACHINE_LEARNING,
        deployment_space_id = space_uid,
        operational_space_id = "production",
        credentials=WMLCredentialsCloud(
            url=WML_CREDENTIALS["url"],
            instance_id=WML_CREDENTIALS["instance_id"],
            username=WML_CREDENTIALS["username"],
            password=WML_CREDENTIALS["password"]
        ),
        background_mode=False
    ).result
    service_provider_id = added_service_provider_result.metadata.id
service_provider_id

In [None]:
def delete_service_provider(wos_client, service_provider_id):
    wos_client.service_providers.delete(service_provider_id)
    print("Deleted existing service_provider for WML instance: {}".format(service_provider_id))

#delete_service_provider(wos_client, service_provider_id)

### 6.5 Create a Subscription

In [None]:
wos_client.subscriptions.show()

In [None]:
subscriptions = wos_client.subscriptions.list().result.subscriptions
for subscription in subscriptions:
    sub_model_id = subscription.entity.asset.asset_id
    if sub_model_id == published_model_uid:
        wos_client.subscriptions.delete(subscription.metadata.id)
        print("Deleted existing subscription for model", published_model_uid)

In [None]:
asset = Asset(
    asset_id=published_model_uid,
    url=created_deployment["entity"]["status"]["online_url"]["url"],
    asset_type=AssetTypes.MODEL,
    input_data_type=InputDataType.STRUCTURED,
    problem_type=ProblemType.BINARY_CLASSIFICATION
)
asset_deployment = AssetDeploymentRequest(
    deployment_id=deployment_uid,
    name=DEPLOYMENT_NAME,
    deployment_type=DeploymentTypes.ONLINE,
    url=created_deployment["entity"]["status"]["online_url"]["url"]
)
training_data_reference = TrainingDataReference(
    type="cos",
    location=COSTrainingDataReferenceLocation(
        bucket=BUCKET_NAME,
        file_name=training_data_file_name
    ),
    connection=COSTrainingDataReferenceConnection.from_dict(
        {
            "resource_instance_id": COS_RESOURCE_CRN,
            "url": COS_ENDPOINT,
            "api_key": COS_API_KEY_ID,
            "iam_url": "https://iam.bluemix.net/oidc/token"
        }
    )
)
asset_properties_request = AssetPropertiesRequest(
    label_column="Call_Drop_Actual",
    probability_fields=["prediction_probability"],
    prediction_field="prediction",
    feature_fields=["outgoing_site_id","Start_Time_MM_DD_YYYY","Start_Time_HH_MM_SS_s","Call_Service_Duration"," _conds"," _dewptm"," _fog"," _hail"," _hum"," _pressurem","total number_of_calls","total call duration (min)","Traffic","lat","long","Call_Drop_Count","Total_Calls","Call_Drop_Perc"],
    categorical_fields=["Start_Time_MM_DD_YYYY","Start_Time_HH_MM_SS_s"," _conds","Traffic"],
    training_data_reference=training_data_reference
)

In [None]:
subscription_details = wos_client.subscriptions.add(
        data_mart_id=data_mart_id,
        service_provider_id=service_provider_id,
        asset=asset,
        deployment=asset_deployment,
        asset_properties=asset_properties_request).result
subscription_id = subscription_details.metadata.id
print(subscription_details)

In [None]:
import time

time.sleep(10)
wos_client.subscriptions.show()

In [None]:
time.sleep(10)
payload_data_set_id = None
payload_datasets = wos_client.data_sets.list(type=DataSetTypes.PAYLOAD_LOGGING, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result.data_sets
if len(payload_datasets) == 0:
    print("No data sets found. Please check subscription status.")
else:
    payload_data_set_id = payload_datasets[0].metadata.id
    if payload_data_set_id is None:
        print("Payload data set not found. Please check subscription status.")
    else:
        print("Payload data set id:", payload_data_set_id)

### 6.6 Score the model in order to configure monitors

In [None]:
fields=list(X_test.columns)

request_data = {
    "fields": fields,
    "values": scoring_data
  }
request_data

In [None]:
import uuid
from ibm_watson_openscale.supporting_classes.payload_record import PayloadRecord
time.sleep(5)
pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
print("Number of records in the payload logging table: {}".format(pl_records_count))
if pl_records_count == 0:
    print("Payload logging did not happen, performing explicit payload logging.")
    wos_client.data_sets.store_records(data_set_id=payload_data_set_id, request_body=[PayloadRecord(
                   scoring_id=str(uuid.uuid4()),
                   request=request_data,
                   response=scoring_response,
                   response_time=460
               )])
    time.sleep(10)
    pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
    print("Number of records in the payload logging table: {}".format(pl_records_count))

### 6.7 Setup Quality Monitoring

In [None]:
time.sleep(10)
target = Target(
        target_type=TargetTypes.SUBSCRIPTION,
        target_id=subscription_id
)
parameters = {
    "min_feedback_data_size": 5
}
quality_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.QUALITY.ID,
    target=target,
    parameters=parameters
).result

In [None]:
quality_monitor_instance_id = quality_monitor_details.metadata.id

### 5.8 Log Feedback Data to your Subscription

In [None]:
feedback_data_raw=pd.concat([X_test,y_test],axis=1)
feedback_data_raw

In [None]:
feedback_dataset_id = None
feedback_dataset = wos_client.data_sets.list(type=DataSetTypes.FEEDBACK, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result
print(feedback_dataset)
feedback_dataset_id = feedback_dataset.data_sets[0].metadata.id
if feedback_dataset_id is None:
    print("Feedback data set not found. Please check quality monitor status.")

In [None]:
feedback_data=json.loads(feedback_data_raw.tail(20).to_json(orient='records'))
feedback_data

In [None]:
wos_client.data_sets.store_records(feedback_dataset_id, request_body=feedback_data, background_mode=False)

In [None]:
wos_client.data_sets.get_records_count(data_set_id=feedback_dataset_id)

#### Run an initial quality test

In [None]:
wos_client.monitor_instances.show()

In [None]:
run_details = wos_client.monitor_instances.run(monitor_instance_id=quality_monitor_instance_id, background_mode=False).result

In [None]:
wos_client.monitor_instances.show_metrics(monitor_instance_id=quality_monitor_instance_id)

In [None]:
run_details.to_dict()

### 5.9 Setup the Fairness Monitors

The code below configures fairness monitoring for our model. It turns on monitoring for two features, *_conds*(Weather Condition) and *Traffic* for the cell tower. In each case, we must specify:
* Which model feature to monitor
* One or more **majority** groups, which are values of that feature that we expect to receive a higher percentage of favorable outcomes
* One or more **minority** groups, which are values of that feature that we expect to receive a higher percentage of unfavorable outcomes
* The threshold at which we would like OpenScale to display an alert if the fairness measurement falls below (in this case, 95%)

Additionally, we must specify which outcomes from the model are favourable outcomes, and which are unfavourable. We must also provide the number of records OpenScale will use to calculate the fairness score. In this case, OpenScale's fairness monitor will run hourly, but will not calculate a new fairness rating until at least 5 records have been added. Finally, to calculate fairness, OpenScale must perform some calculations on the training data, so we provide the dataframe containing the data.

In [None]:
wos_client.monitor_instances.show()

In [None]:
target = Target(
    target_type=TargetTypes.SUBSCRIPTION,
    target_id=subscription_id
)
parameters = {
    "features": [
        {"feature": " _conds",
         "majority": ['Haze', 'Rain'],
         "minority": ['Clear', 'Fog', 'Partly Cloudy'],
         "threshold": 0.95
         },
        {"feature": "Traffic",
         "majority": ['Low'],
         "minority": ['High', 'Medium'],
         "threshold": 0.95
         }
    ],
    "favourable_class": [1],
    "unfavourable_class": [0],
    "min_records": 5
}

fairness_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.FAIRNESS.ID,
    target=target,
    parameters=parameters).result
fairness_monitor_instance_id =fairness_monitor_details.metadata.id

In [None]:
#time.sleep(10)
#fairness_monitor_instance_id="88c2d845-d783-44f0-8298-14e8163f806b"
run_details = wos_client.monitor_instances.run(monitor_instance_id=fairness_monitor_instance_id, background_mode=False)
wos_client.monitor_instances.list_runs(monitor_instance_id=fairness_monitor_instance_id).result.to_dict()

In [None]:
run_details.result.to_dict()

In [None]:
time.sleep(10)

wos_client.monitor_instances.show_metrics(monitor_instance_id=fairness_monitor_instance_id)

### Add some more Payload (Optional for populating your dashboard)

If you wish to add some Payload Data. Take different sections of your test dataset and send to OpenScale as shown below-

In [None]:
#0 to 200 done. some may not have gone through - getting a count of 20+396=216

score=X_test[0:200]
score

In [None]:
scoring_data=list(list(x) for x in zip(*(score[x].values.tolist() for x in score.columns)))
scoring_data

In [None]:
fields=list(X_test.columns)
print(len(fields))
fields, scoring_data[0]

In [None]:
payload_scoring = {"input_data": [{"fields": fields, "values": scoring_data}]}
payload_scoring

In [None]:
scoring_response = wml_client.deployments.score(deployment_uid, payload_scoring)

In [None]:
pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
print("Number of records in the payload logging table: {}".format(pl_records_count))

print("Adding more payload data")
store_record_details = wos_client.data_sets.store_records(data_set_id=payload_data_set_id, request_body=[PayloadRecord(
                   scoring_id=str(uuid.uuid4()),
                   request=payload_scoring,
                   response=scoring_response,
                   response_time=460
               )])

time.sleep(10)
pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
print("Number of records in the payload logging table: {}".format(pl_records_count))

In [None]:
pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
print("Number of records in the payload logging table: {}".format(pl_records_count))

In [None]:
wos_client.data_sets.list(type=DataSetTypes.PAYLOAD_LOGGING, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result.to_dict()