# IBM Watson OpenScale Lab instructions


**This notebook is created to help demonstrating Model Risk Management(MRM) workflow.**

**This is a improved version of the GermanCreditRisk model with better `auc-roc (85%)` score. It will be used for comparison purpose with basic GermanCreditRisk model having less `auc-roc (71%)` score.** 

### Major Change: **Added preprocessing steps and change in categorical encoding method**

In [None]:
!pip install numpy==1.15.4 --no-cache | tail -n 1
!pip install --upgrade watson-machine-learning-client | tail -n 1
!pip install --upgrade SciPy --no-cache | tail -n 1

## Import Libraries

In [None]:
import pandas as pd
import sys
import numpy
import sklearn
import sklearn.ensemble
numpy.set_printoptions(threshold=sys.maxsize)
from sklearn.utils.multiclass import type_of_target
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import cross_validate
from sklearn.metrics import get_scorer
from sklearn.model_selection import cross_validate
from sklearn.metrics import classification_report

In [None]:
# Add WML credentials. If using Watson cloud instance, copy paste the WML credentials from there.
# Make sure to copy the pre-prod WML credentails here.
wml_credentials = {
  "apikey": "xxxxx",
  "iam_apikey_description": "Auto-generated for key xxxxx",
  "iam_apikey_name": "Service credentials-1",
  "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
  "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/xxxxx::serviceid:ServiceId-xxxxx",
  "instance_id": "xxxxx",
  "url": "https://us-south.ml.cloud.ibm.com"
}

## Load dataset

In [None]:
!rm german_credit_data_biased_training.csv
!wget https://raw.githubusercontent.com/rachvis/Openscale-Demo/master/german_credit_data_biased_training.csv

In [None]:
data_df=pd.read_csv ("german_credit_data_biased_training.csv")

In [None]:
data_df.head()

In [None]:
target_label_name = "Risk"
feature_cols= data_df.drop(columns=[target_label_name])
label= data_df[target_label_name]

## Set model evaluation properties

In [None]:
optimization_metric = 'roc_auc'
random_state = 33
cv_num_folds = 3
holdout_fraction = 0.1

In [None]:
if type_of_target(label.values) in ['multiclass', 'binary']:
    X_train, X_holdout, y_train, y_holdout = train_test_split(feature_cols, label, test_size=holdout_fraction, random_state=random_state, stratify=label.values)
else:
    X_train, X_holdout, y_train, y_holdout = train_test_split(feature_cols, label, test_size=holdout_fraction, random_state=random_state)

## Data preprocessing transformer generation

In [None]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('OrdinalEncoder', OrdinalEncoder(categories='auto',dtype=numpy.float64 ))])

In [None]:
numeric_features = feature_cols.select_dtypes(include=['int64', 'float64']).columns
categorical_features = feature_cols.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

## Initiate model and create pipeline

In [None]:
model=sklearn.ensemble.gradient_boosting.GradientBoostingClassifier()

In [None]:
gbt_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', model)])

In [None]:
model_gbt=gbt_pipeline.fit(X_train, y_train)

In [None]:
y_pred = model_gbt.predict(X_holdout)

## Evaluate model performance on test data and Cross validation

In [None]:
scorer = get_scorer(optimization_metric)
scorer(model_gbt,X_holdout, y_holdout)

In [None]:
# Cross validation -3 folds
cv_results = cross_validate(model_gbt,X_train,y_train, scoring={optimization_metric:scorer})
numpy.mean(cv_results['test_' + optimization_metric])

In [None]:
print(classification_report(y_pred, y_holdout))

## Initiate WML

In [None]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient
wml_client = WatsonMachineLearningAPIClient(wml_credentials)
print(wml_client.service_instance.get_url())

## Remove existing model and deployment

In [None]:
MODEL_NAME="German Credit Risk Model - PreProd"
DEPLOYMENT_NAME="German Credit Risk Model - PreProd"

model_deployment_ids = wml_client.deployments.get_uids()
for deployment_id in model_deployment_ids:
    deployment = wml_client.deployments.get_details(deployment_id)
    model_id = deployment['entity']['deployable_asset']['guid']
    if deployment['entity']['name'] == DEPLOYMENT_NAME:
        print('Deleting deployment id', deployment_id)
        wml_client.deployments.delete(deployment_id)
        print('Deleting model id', model_id)
        wml_client.repository.delete(model_id)
wml_client.repository.list_models()

## Store Model

In [None]:
model_props_gbt = {
    wml_client.repository.ModelMetaNames.NAME: MODEL_NAME,
    wml_client.repository.ModelMetaNames.DESCRIPTION: MODEL_NAME,
    wml_client.repository.ModelMetaNames.FRAMEWORK_NAME: "scikit-learn",
    wml_client.repository.ModelMetaNames.FRAMEWORK_VERSION: "0.19",
    wml_client.repository.ModelMetaNames.RUNTIME_NAME: "python"
}

published_model_details = wml_client.repository.store_model(model=model_gbt, meta_props=model_props_gbt, training_data=feature_cols,training_target=label)
#client.repository.store_model(model, meta_props = model_props, training_data = training_data, training_target = labels)
print(published_model_details)

In [None]:
# List models in the repository
wml_client.repository.list_models()

In [None]:
# Get the model UID
model_uid = wml_client.repository.get_model_uid(published_model_details)
model_uid

## Deploy model

In [None]:
wml_deployments = wml_client.deployments.get_details()
deployment_uid = None
for deployment in wml_deployments['resources']:
    if DEPLOYMENT_NAME == deployment['entity']['name']:
        deployment_uid = deployment['metadata']['guid']
        break

if deployment_uid is None:
    print("Deploying model...")

    deployment = wml_client.deployments.create(artifact_uid=model_uid, name=DEPLOYMENT_NAME, description=DEPLOYMENT_NAME, asynchronous=False)
    deployment_uid = wml_client.deployments.get_uid(deployment)
    
print("Model id: {}".format(model_uid))
print("Deployment id: {}".format(deployment_uid))

In [None]:
deployment_uid=wml_client.deployments.get_uid(deployment)
deployment_uid

## Sample scoring

In [None]:
fields = ["CheckingStatus","LoanDuration","CreditHistory","LoanPurpose","LoanAmount","ExistingSavings","EmploymentDuration","InstallmentPercent","Sex","OthersOnLoan","CurrentResidenceDuration","OwnsProperty","Age","InstallmentPlans","Housing","ExistingCreditsCount","Job","Dependents","Telephone","ForeignWorker"]
values = [
  ["no_checking",13,"credits_paid_to_date","car_new",1343,"100_to_500","1_to_4",2,"female","none",3,"savings_insurance",46,"none","own",2,"skilled",1,"none","yes"],
  ["no_checking",24,"prior_payments_delayed","furniture",4567,"500_to_1000","1_to_4",4,"male","none",4,"savings_insurance",36,"none","free",2,"management_self-employed",1,"none","yes"],
  ["0_to_200",26,"all_credits_paid_back","car_new",863,"less_100","less_1",2,"female","co-applicant",2,"real_estate",38,"none","own",1,"skilled",1,"none","yes"],
  ["0_to_200",14,"no_credits","car_new",2368,"less_100","1_to_4",3,"female","none",3,"real_estate",29,"none","own",1,"skilled",1,"none","yes"],
  ["0_to_200",4,"no_credits","car_new",250,"less_100","unemployed",2,"female","none",3,"real_estate",23,"none","rent",1,"management_self-employed",1,"none","yes"],
  ["no_checking",17,"credits_paid_to_date","car_new",832,"100_to_500","1_to_4",2,"male","none",2,"real_estate",42,"none","own",1,"skilled",1,"none","yes"],
  ["no_checking",33,"outstanding_credit","appliances",5696,"unknown","greater_7",4,"male","co-applicant",4,"unknown",54,"none","free",2,"skilled",1,"yes","yes"],
  ["0_to_200",13,"prior_payments_delayed","retraining",1375,"100_to_500","4_to_7",3,"male","none",3,"real_estate",37,"none","own",2,"management_self-employed",1,"none","yes"]
]

payload_scoring = {"fields": fields,"values": values}
print(payload_scoring)

In [None]:
credit_risk_scoring_endpoint = None
print(deployment_uid)

for deployment in wml_client.deployments.get_details()['resources']:
    if deployment_uid in deployment['metadata']['guid']:
        credit_risk_scoring_endpoint = deployment['entity']['scoring_url']
        
print(credit_risk_scoring_endpoint)

In [None]:
scoring_response = wml_client.deployments.score(credit_risk_scoring_endpoint, payload_scoring)
scoring_response