# Monitor your ML Models using Watson OpenScale

## 1. Setup the Notebook Environment

## 1.1 Install the necessary packages

### Watson OpenScale Python SDK

In [None]:
!pip install ibm-ai-openscale

### Scikit-learn version 0.20

In [None]:
!pip install scikit-learn==0.20.3

### Watson Machine Learning Python SDK

In [None]:
!pip install watson-machine-learning-client -U

Restart the Notebook after Installing the required packages. By clicking on `Kernel>Restart`

## 1.2 Import Packages

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn import preprocessing
from sklearn import svm, metrics
from scipy import sparse
from watson_machine_learning_client import WatsonMachineLearningAPIClient
from sklearn.preprocessing import StandardScaler, LabelEncoder, Imputer, OneHotEncoder
import json
import ibm_db


import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV

from ibm_ai_openscale import APIClient4ICP
from ibm_ai_openscale.engines import *
from ibm_ai_openscale.utils import *
from ibm_ai_openscale.supporting_classes import PayloadRecord, Feature
from ibm_ai_openscale.supporting_classes.enums import *

## 2. Configuration

### 2.1 Global Variables

In [None]:
MODEL_NAME=""
DEPLOYMENT_NAME=""
# Ensure you create a an empty Schema and store the name in this variable
SCHEMA_NAME=""

### 2.2 Add Dataset

Select the `Insert Pandas Dataframe` option, after selecting the below cell. Ensure the variable name is `df_data_1`

### 2.3 Add your WML Credentials

In [None]:
import sys,os,os.path


WML_CREDENTIALS = {
"token": os.environ['USER_ACCESS_TOKEN'],
"instance_id" : "wml_local",
"url" : os.environ['RUNTIME_ENV_APSX_URL'],
"version": "2.5.0"
}

### 2.4 Update your AIOS Credentials

In [None]:
WOS_CREDENTIALS={
    "url" : os.environ['RUNTIME_ENV_APSX_URL'],
    "username":"admin",
    "password":"password"
}

### 2.5 Add your Db credentials

#### These Db credentials are needed ONLY if you have NOT configured your `OpenScale Datamart`.

In [None]:

DATABASE_CREDENTIALS = {
    "hostname": "DB-Server-IP",
    "username": "DB-username",
    "password": "DB-Pwd",
    "port": xxxx,
    "db": "DB-name",
    
}



## 3. Create the Call Drop Model using Scikit-Learn

In [None]:
X=df_data_1.drop(['Call_Drop_Actual'], axis=1)
y=df_data_1.loc[:, 'Call_Drop_Actual']

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)



In [None]:
'''Add a categorical transformer to your model pipeline. 
    You will need to add a label encoder into the model pipeline before storing it into WML '''

categorical_features = ["Start_Time_MM_DD_YYYY", "Traffic", " _conds", "Start_Time_HH_MM_SS_s"]
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])


In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)])

In [None]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                      ('model', svm.SVC(kernel='linear'))])

In [None]:
model = pipeline.fit(X_train,y_train)

In [None]:
client = WatsonMachineLearningAPIClient(WML_CREDENTIALS)

In [None]:
meta_props={
 client.repository.ModelMetaNames.NAME: "Call Drop Prediction model",
 client.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.20-py3.6",
 client.repository.ModelMetaNames.TYPE: "scikit-learn_0.20",
}

In [None]:
## Store the model on WML
published_model = client.repository.store_model(pipeline,
                                             meta_props=meta_props,
                                             training_data=X_train,
                                             training_target=y_train
                                                )





## 4. Create a new Deployment Space

Check the documentation- https://www.ibm.com/support/producthub/icpdata/docs/content/SSQNUZ_current/wsj/analyze-data/ml-spaces_local.html on steps to create a deployment space

In [None]:
project_id = os.environ['PROJECT_ID']
client.set.default_project(project_id)

In [None]:
def guid_from_space_name(client, space_name):

    instance_details = client.service_instance.get_details()

    space = client.spaces.get_details()
    res=[]
    for item in space['resources']: 
        if item['entity']["name"] == space_name:
            res=item['metadata']['guid']

    return res

In [None]:
# Enter the name of your deployment space of the current project
dep_name=''
space_uid = guid_from_space_name(client, dep_name)

In [None]:
space_uid

In [None]:
client.set.default_space(space_uid)

In [None]:
 deploy_meta = {
     client.deployments.ConfigurationMetaNames.NAME: DEPLOYMENT_NAME,
     client.deployments.ConfigurationMetaNames.ONLINE: {}
 }

In [None]:
## Create a Deployment for your stored model

created_deployment = client.deployments.create(published_model_uid, meta_props=deploy_meta)



In [None]:
client.deployments.get_details()

In [None]:

scoring_endpoint = None
deployment_uid=created_deployment['metadata']['guid']
print(deployment_uid)

for deployment in client.deployments.get_details()['resources']:
    if deployment_uid in deployment['metadata']['guid']:
        scoring_endpoint = deployment['entity']['status']['online_url']['url']
        
print(scoring_endpoint)

## 5. Setup your Watson Openscale Dashboard 

### 5.1 Create the Watson Openscale Client

In [None]:
ai_client = APIClient4ICP(aios_credentials=WOS_CREDENTIALS)
ai_client.version

### 5.2 Add your Machine Learning Provider

If you have already bound the ML Provider to the Openscale instance, then just retrieve the binding_uid, by commenting first line and uncommenting the second line

In [None]:
try:
    data_mart_details = ai_client.data_mart.get_details()
    print('Using existing external datamart')
except:
    print('Setting up external datamart')
    ai_client.data_mart.setup(db_credentials=DATABASE_CREDENTIALS, schema=SCHEMA_NAME)

In [None]:
binding_uid = ai_client.data_mart.bindings.add('WML instance', WatsonMachineLearningInstance4ICP(WML_CREDENTIALS))
#bindings_details = ai_client.data_mart.bindings.get_details()
#ai_client.data_mart.bindings.list()

In [None]:
ai_client.data_mart.bindings.list_assets()

### 5.3 Setup the Datamart on AI OpenScale

In [None]:
try:
    data_mart_details = ai_client.data_mart.get_details()
    print('Using existing external datamart')
except:
    print('Setting up external datamart')
    ai_client.data_mart.setup(db_credentials=DATABASE_CREDENTIALS, schema=SCHEMA_NAME)

### 5.4 Perform Initial Scoring for your Model Deployment


In [None]:
score=X_test.tail(20)
score

In [None]:
scoring_data=list(list(x) for x in zip(*(score[x].values.tolist() for x in score.columns)))
scoring_data

In [None]:
fields=list(X_test.columns)
print(len(fields))
fields, scoring_data[0]

In [None]:

job_payload = {
client.deployments.ScoringMetaNames.INPUT_DATA: [{
 'values': scoring_data
}]
}
print(job_payload)

In [None]:
scoring_response = client.deployments.score(deployment_uid, job_payload)

print(scoring_response)

### 5.5 Create a new Subscription 

In [None]:
subscription = ai_client.data_mart.subscriptions.add(WatsonMachineLearningAsset(
    published_model_uid,
    problem_type=ProblemType.BINARY_CLASSIFICATION,
    input_data_type=InputDataType.STRUCTURED,
    label_column='Call_Drop_Actual',
    prediction_column='prediction',
    probability_column='prediction_probability',
    categorical_columns=["Start_Time_MM_DD_YYYY","Start_Time_HH_MM_SS_s"," _conds","Traffic"],
    feature_columns = ["outgoing_site_id","Start_Time_MM_DD_YYYY","Start_Time_HH_MM_SS_s","Call_Service_Duration"," _conds"," _dewptm"," _fog"," _hail"," _hum"," _pressurem","total number_of_calls","total call duration (min)","Traffic","lat","long","Call_Drop_Count","Total_Calls","Call_Drop_Perc"],
))

In [None]:
subscriptions_uids = ai_client.data_mart.subscriptions.get_uids()
ai_client.data_mart.subscriptions.list()

### 5.6 Perform Inital Payload Logging
Note: You may re-use this code snippet by modifying the request_data variable to perform payload logging after finishing the initial dashboard setup

In [None]:
## From the output of the above table choose your model name and copy the uid against it. Store the uid in the subscription_uid variable


subscription_uid="288b22ad-fa7e-4ebb-b04d-564f726724fb"
from ibm_ai_openscale import APIClient4ICP
from ibm_ai_openscale.supporting_classes import PayloadRecord


subscription = ai_client.data_mart.subscriptions.get(subscription_uid=subscription_uid)

"""
request_data - input to scoring endpoint in supported by Watson OpenScale format
response_data - output from scored model in supported by Watson OpenScale format
response_time - scoring request response time [ms] (integer type)

Example:

request_data = {
    "fields": ["AGE", "SEX", "BP", "CHOLESTEROL", "NA", "K"],
    "values": [[28, "F", "LOW", "HIGH", 0.61, 0.026]]
  }

response_data = {
    "fields": ["AGE", "SEX", "BP", "CHOLESTEROL", "NA", "K", "probability", "prediction", "DRUG"],
    "values": [[28, "F", "LOW", "HIGH", 0.61, 0.026, [0.82, 0.07, 0.0, 0.05, 0.03], 0.0, "drugY"]]
  }
"""



records = [PayloadRecord(request=request_data, response=scoring_response, response_time=18), 
                PayloadRecord(request=request_data, response=scoring_response, response_time=12)]

subscription.payload_logging.store(records=records)

### 5.7 Setup Quality Monitoring

```NOTE: If you are using the dataset provided in the dashboard, leave the threshold monitors to these values. However, if you are using your own dataset, you can play around with the threshold value (value b/w 0 and 1) according to your requirement.```

In [None]:
time.sleep(5)
subscription.quality_monitoring.enable(threshold=0.95, min_records=5)

### 5.8 Log Feedback Data to your Subscription

In [None]:
feedback_data_raw=pd.concat([X_test,y_test],axis=1)
feedback_data_raw

In [None]:
feedback_data=feedback_data_raw.tail(20).values.tolist()
feedback_data

In [None]:
feedback_scoring={
    "data":feedback_data
}

In [None]:

subscription.feedback_logging.store(feedback_scoring['data'])




In [None]:
subscription.feedback_logging.show_table()

#### Run an inital quality test

In [None]:
run_details = subscription.quality_monitoring.run(background_mode=False)

In [None]:
subscription.quality_monitoring.show_table()

In [None]:
%matplotlib inline

quality_pd = subscription.quality_monitoring.get_table_content(format='pandas')
quality_pd.plot.barh(x='id', y='value');

### 5.9 Setup the Fairness Monitors

The code below configures fairness monitoring for our model. It turns on monitoring for two features, _conds(Weather Condition) and Traffic for the cell tower. In each case, we must specify:
  * Which model feature to monitor
  * One or more **majority** groups, which are values of that feature that we expect to receive a higher percentage of favorable outcomes
  * One or more **minority** groups, which are values of that feature that we expect to receive a higher percentage of unfavorable outcomes
  * The threshold at which we would like OpenScale to display an alert if the fairness measurement falls below (in this case, 95%)

Additionally, we must specify which outcomes from the model are favourable outcomes, and which are unfavourable. We must also provide the number of records OpenScale will use to calculate the fairness score. In this case, OpenScale's fairness monitor will run hourly, but will not calculate a new fairness rating until at least 5 records have been added. Finally, to calculate fairness, OpenScale must perform some calculations on the training data, so we provide the dataframe containing the data.

In [None]:
subscription.fairness_monitoring.enable(
            features=[
                Feature("Traffic", majority=['Low'], minority=['High','Medium'], threshold=0.95),
                Feature(" _conds", majority=['Haze','Rain'], minority=['Clear','Fog','Partly Cloudy'], threshold=0.95)
            ],
            favourable_classes=[1],
            unfavourable_classes=[0],
            min_records=5,
            training_data=df_data_1
        )

In [None]:
time.sleep(5)

run_details = subscription.fairness_monitoring.run(background_mode=False)

In [None]:
time.sleep(5)

subscription.fairness_monitoring.show_table()

### Add some more Payload (Optional for populating your dashboard)

If you wish to add some Payload Data. Take different sections of your test dataset and send to OpenScale as shown below-

In [None]:
score=X_test.head(100)
score

In [None]:
scoring_data=list(list(x) for x in zip(*(score[x].values.tolist() for x in score.columns)))
scoring_data

In [None]:
fields=list(X_test.columns)
print(len(fields))
fields, scoring_data[0]

In [None]:
request_data = {
    "fields": fields,
    "values": scoring_data
  }
request_data

In [None]:
## From the output of the above table choose your model name and copy the uid against it. Store the uid in the subscription_uid variable



from ibm_ai_openscale import APIClient4ICP
from ibm_ai_openscale.supporting_classes import PayloadRecord


subscription = ai_client.data_mart.subscriptions.get(subscription_uid=subscription_uid)

"""
request_data - input to scoring endpoint in supported by Watson OpenScale format
response_data - output from scored model in supported by Watson OpenScale format
response_time - scoring request response time [ms] (integer type)

Example:

request_data = {
    "fields": ["AGE", "SEX", "BP", "CHOLESTEROL", "NA", "K"],
    "values": [[28, "F", "LOW", "HIGH", 0.61, 0.026]]
  }

response_data = {
    "fields": ["AGE", "SEX", "BP", "CHOLESTEROL", "NA", "K", "probability", "prediction", "DRUG"],
    "values": [[28, "F", "LOW", "HIGH", 0.61, 0.026, [0.82, 0.07, 0.0, 0.05, 0.03], 0.0, "drugY"]]
  }
"""



records = [PayloadRecord(request=request_data, response=scoring_response, response_time=18), 
                PayloadRecord(request=request_data, response=scoring_response, response_time=12)]

subscription.payload_logging.store(records=records)