# V4 Scikit-Learn Training, Saving and WML Online Deployment and Scoring


!pip install watson-machine-learning-client-v4

In [54]:
!wget http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data -O processed.cleveland.data

--2019-11-21 16:49:16--  http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18461 (18K) [application/x-httpd-php]
Saving to: 'processed.cleveland.data'


2019-11-21 16:49:16 (258 KB/s) - 'processed.cleveland.data' saved [18461/18461]



In [55]:
!pip install sklearn-pandas



## 1.0 Model training

In [77]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

import numpy as np
from pprint import pprint

import pandas as pd
import time

In [78]:
ClevelandDataSet = "processed.cleveland.data"

In [79]:
col_names = ['age','sex','cp','restbp','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal','num']

heart_df = pd.read_csv(ClevelandDataSet, sep=',', header=None, names=col_names, na_filter= True, na_values= {'ca': '?', 'thal': '?'}).dropna()
heart_df.head()

Unnamed: 0,age,sex,cp,restbp,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [80]:
# Create a new column which contains if a heart disease is diagnosed or not
heart_df['diagnosed'] = heart_df['num'].map(lambda d: 1 if d > 0 else 0)

In [81]:
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

feature_cols = heart_df.columns[:-2]
steps = [
    ('df_mapper', DataFrameMapper([
        (feature_cols, MinMaxScaler(copy=True, feature_range=(0,1)))
    ])),
    ('lr_classifier', LogisticRegression())
]
skl_ppl = Pipeline(steps)
features_df = heart_df[feature_cols]

In [82]:
# Split Data
heart_train, heart_test, target_train, target_test = train_test_split(features_df, heart_df.loc[:,'diagnosed'], test_size=0.33, random_state=0)

In [83]:
skl_ppl.fit(heart_train, target_train)

Pipeline(memory=None,
     steps=[('df_mapper', DataFrameMapper(default=False, df_out=False,
        features=[(Index(['age', 'sex', 'cp', 'restbp', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal'],
      dtype='object'), MinMaxScaler(copy=True, feature_range=(0, 1)))],
        input_df=F...penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))])

In [84]:
y_output = skl_ppl.predict(heart_test)

In [85]:
from sklearn.metrics import accuracy_score
accuracy_score(y_output, target_test.values)

0.8686868686868687

# Initialize the wml python client

In [86]:
import sys,os,os.path
token = os.environ['USER_ACCESS_TOKEN']

wml_credentials = {
    "token" : token,
    "instance_id" : "wml_local",
    "url": "https://wmlfinal-cpd-wmlfinal.apps.cp4dfvt-lb-1.fyre.ibm.com",
    "version": "2.5.0"
}

from watson_machine_learning_client import WatsonMachineLearningAPIClient

client = WatsonMachineLearningAPIClient(wml_credentials)

In [87]:
space_meta = {
                client.spaces.ConfigurationMetaNames.NAME:"myspace_deployment_demo",   
             }
spaces_details=client.spaces.store(space_meta)
spaces_details

{'metadata': {'role': 'Admin',
  'guid': '684a7ad5-9fc3-4a87-9e68-eecc70d335fc',
  'id': '684a7ad5-9fc3-4a87-9e68-eecc70d335fc',
  'created_at': '2019-11-21T17:31:27.783Z',
  'owner': '1000331006',
  'href': '/v4/spaces/684a7ad5-9fc3-4a87-9e68-eecc70d335fc'},
 'entity': {'name': 'myspace_deployment_demo',
  'assets': {'experiments': [],
   'pipelines': [],
   'runtimes': [],
   'libraries': [],
   'data_references': [],
   'models': [],
   'functions': []}}}

In [88]:
client.spaces.list(limit=5)

------------------------------------  ---------------------------------  ------------------------
GUID                                  NAME                               CREATED
684a7ad5-9fc3-4a87-9e68-eecc70d335fc  myspace_deployment_demo            2019-11-21T17:31:27.783Z
0fb6633c-4d3f-4a13-ba29-398012541f78  myspace_deployment_for_asset_file  2019-11-21T12:21:08.430Z
------------------------------------  ---------------------------------  ------------------------


In [89]:
client.set.default_space("0fb6633c-4d3f-4a13-ba29-398012541f78")

'SUCCESS'

In [114]:
project_id = os.environ['PROJECT_ID']
client.set.default_project(project_id)

Unsetting the space_id ...


'SUCCESS'

## 2.0 Saving the model

In [116]:
meta_props={
     client.repository.ModelMetaNames.NAME: "skl_pipeline_heart_problem_prediction-nov21-demo",
     client.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.20-py3",
     client.repository.ModelMetaNames.TYPE: "scikit-learn_0.20"
}


model_artifact = client.repository.store_model(skl_ppl,
                                                   meta_props=meta_props,
                                                   training_data=heart_train,
                                                   training_target=target_train)

In [117]:
model_uid = client.repository.get_model_uid(model_artifact)
print("Model UID = " + model_uid)

Model UID = db7e4671-1275-4d82-b1b8-34213a43f353


In [119]:
client.repository.list_models()

------------------------------------  ------------------------------------------------  ------------------------  -----------------
GUID                                  NAME                                              CREATED                   TYPE
db7e4671-1275-4d82-b1b8-34213a43f353  skl_pipeline_heart_problem_prediction-nov21-demo  2019-11-21T17:43:29.002Z  scikit-learn_0.20
------------------------------------  ------------------------------------------------  ------------------------  -----------------


# Deploy model for online scoring

In [93]:
meta_props = {
   client.deployments.ConfigurationMetaNames.NAME: "v4_deploy_skl_ppl_nov21_demo",
   client.deployments.ConfigurationMetaNames.DESCRIPTION: "v4_deploy_skl_ppl",
   client.deployments.ConfigurationMetaNames.ONLINE: {}
}

In [94]:
deployment_details = client.deployments.create(model_uid, meta_props=meta_props)



#######################################################################################

Synchronous deployment creation for uid: 'c393e3c2-77af-45e3-b86f-f72c0a794525' started

#######################################################################################


initializing
ready


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='d7f74f5c-00d7-4eb3-8786-05accb0ef8cc'
------------------------------------------------------------------------------------------------




In [95]:
dep_id = client.deployments.get_uid(deployment_details)

# Score the deployed online model

In [96]:
scoring_payload = {
    'fields': heart_test.columns.values.tolist(),
    'values': heart_test.values.tolist()
}

In [97]:
scoring_payload

{'fields': ['age',
  'sex',
  'cp',
  'restbp',
  'chol',
  'fbs',
  'restecg',
  'thalach',
  'exang',
  'oldpeak',
  'slope',
  'ca',
  'thal'],
 'values': [[52.0,
   1.0,
   1.0,
   118.0,
   186.0,
   0.0,
   2.0,
   190.0,
   0.0,
   0.0,
   2.0,
   0.0,
   6.0],
  [42.0, 0.0, 4.0, 102.0, 265.0, 0.0, 2.0, 122.0, 0.0, 0.6, 2.0, 0.0, 3.0],
  [56.0, 1.0, 3.0, 130.0, 256.0, 1.0, 2.0, 142.0, 1.0, 0.6, 2.0, 1.0, 6.0],
  [59.0, 1.0, 4.0, 138.0, 271.0, 0.0, 2.0, 182.0, 0.0, 0.0, 1.0, 0.0, 3.0],
  [59.0, 1.0, 2.0, 140.0, 221.0, 0.0, 0.0, 164.0, 1.0, 0.0, 1.0, 0.0, 3.0],
  [44.0, 1.0, 4.0, 110.0, 197.0, 0.0, 2.0, 177.0, 0.0, 0.0, 1.0, 1.0, 3.0],
  [38.0, 1.0, 1.0, 120.0, 231.0, 0.0, 0.0, 182.0, 1.0, 3.8, 2.0, 0.0, 7.0],
  [62.0, 0.0, 4.0, 140.0, 394.0, 0.0, 2.0, 157.0, 0.0, 1.2, 2.0, 0.0, 3.0],
  [69.0, 1.0, 3.0, 140.0, 254.0, 0.0, 2.0, 146.0, 0.0, 2.0, 2.0, 3.0, 7.0],
  [46.0, 0.0, 4.0, 138.0, 243.0, 0.0, 2.0, 152.0, 1.0, 0.0, 2.0, 0.0, 3.0],
  [65.0, 1.0, 4.0, 110.0, 248.0, 0.0, 2.0, 158.

In [98]:
scoring_data = {
            client.deployments.ScoringMetaNames.INPUT_DATA: [
                scoring_payload
            ]
        }
predictions = client.deployments.score(dep_id, scoring_data)

In [99]:
predictions

{'predictions': [{'fields': ['prediction', 'probability'],
   'values': [[0, [0.8512809257373647, 0.1487190742626353]],
    [0, [0.7607467745016144, 0.23925322549838557]],
    [1, [0.24933240658653466, 0.7506675934134653]],
    [0, [0.8566908500076501, 0.14330914999234992]],
    [0, [0.7680418942005225, 0.2319581057994775]],
    [0, [0.8097266445201576, 0.1902733554798424]],
    [1, [0.37776572467779956, 0.6222342753222004]],
    [0, [0.7648070528206659, 0.23519294717933414]],
    [1, [0.10976962923225086, 0.8902303707677491]],
    [0, [0.5815939971987363, 0.41840600280126367]],
    [1, [0.3942003661562703, 0.6057996338437297]],
    [0, [0.8430491489070361, 0.15695085109296394]],
    [1, [0.36075298871022454, 0.6392470112897755]],
    [1, [0.3839149996863046, 0.6160850003136954]],
    [0, [0.8139750087911286, 0.18602499120887134]],
    [0, [0.863990841995643, 0.13600915800435692]],
    [1, [0.09931792080519553, 0.9006820791948045]],
    [0, [0.8890899569910073, 0.11091004300899267]],
 

In [100]:
client.data_assets.list()

--------------  ----------  ----  ------------------------------------
NAME            ASSET_TYPE  SIZE  ASSET_ID
heart_test.csv  data_asset  6197  6f33a06a-d68e-4ab7-a2c9-78fc07479dd6
--------------  ----------  ----  ------------------------------------


# Batch Scoring

In [101]:
dep_details = client.deployments.create(artifact_uid=model_uid,meta_props={
                client.deployments.ConfigurationMetaNames.NAME:"scikit_batch_score",
                client.deployments.ConfigurationMetaNames.BATCH:{},
                client.deployments.ConfigurationMetaNames.COMPUTE:{"name":"S","nodes":1}
 })



#######################################################################################

Synchronous deployment creation for uid: 'c393e3c2-77af-45e3-b86f-f72c0a794525' started

#######################################################################################


ready.


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='81650c94-6459-4b10-8b77-bc8f6b9740f4'
------------------------------------------------------------------------------------------------




In [102]:
dep_id = client.deployments.get_uid(dep_details)

In [103]:
dep_id

'81650c94-6459-4b10-8b77-bc8f6b9740f4'

In [104]:
client.data_assets.list()

------------------  ----------  ----  ------------------------------------
NAME                ASSET_TYPE  SIZE  ASSET_ID
heart_test.csv      data_asset  6197  6f33a06a-d68e-4ab7-a2c9-78fc07479dd6
MyAsset_heart_data  data_asset  6197  89528d46-a316-487e-8291-bdb13c8aac23
------------------  ----------  ----  ------------------------------------


In [105]:
asset_details = client.data_assets.get_details("6f33a06a-d68e-4ab7-a2c9-78fc07479dd6")

In [106]:
asset_href = client.data_assets.get_href(asset_details)

# CREATE JOB

In [107]:
job_payload_ref = {
    client.deployments.ScoringMetaNames.INPUT_DATA_REFERENCES: [{
        "id": "1234",
        "name": "test_ref_input",
        "type": "data_asset",
        "connection": {},
        "location": {
            "href": asset_href
        },
        "schema": {}
    }],
    client.deployments.ScoringMetaNames.OUTPUT_DATA_REFERENCE: {
            "type": "data_asset",
            "connection": {},
            "location": {
                "name": "sk_learn_results_{}.csv".format(dep_id),
                "description": "testing zip results"
            }
        }
    }

In [108]:
job = client.deployments.create_job(deployment_id=dep_id,meta_props=job_payload_ref)

In [109]:
job_id = client.deployments.get_job_uid(job)

In [111]:
client.deployments.get_job_status(job_id)

{'state': 'completed',
 'running_at': '2019-11-21T17:40:07.357731Z',
 'completed_at': '2019-11-21T17:40:08.224678Z'}

In [112]:
client.data_assets.list()

---------------------------------------------------------  ----------  ----  ------------------------------------
NAME                                                       ASSET_TYPE  SIZE  ASSET_ID
heart_test.csv                                             data_asset  6197  6f33a06a-d68e-4ab7-a2c9-78fc07479dd6
MyAsset_heart_data                                         data_asset  6197  89528d46-a316-487e-8291-bdb13c8aac23
sk_learn_results_81650c94-6459-4b10-8b77-bc8f6b9740f4.csv  data_asset  4527  5ea79cc9-980a-4607-9668-8875424558bd
---------------------------------------------------------  ----------  ----  ------------------------------------


In [113]:
client.data_assets.download(asset_uid="5ea79cc9-980a-4607-9668-8875424558bd",filename="sk_learn_results.csv")

Successfully saved asset content to file: 'sk_learn_results.csv'


'/home/wsuser/work/sk_learn_results.csv'

In [51]:
client.data_assets.list()

---------------------------------------------------------  ----------  ----  ------------------------------------
NAME                                                       ASSET_TYPE  SIZE  ASSET_ID
heart_test.csv                                             data_asset  6197  6f33a06a-d68e-4ab7-a2c9-78fc07479dd6
sk_learn_results_ebca06d3-6497-4018-8571-8c887309503d.csv  data_asset  4527  174a7302-7515-4e40-a5a7-e13e55515291
---------------------------------------------------------  ----------  ----  ------------------------------------


In [52]:
client.data_assets.delete(asset_uid="174a7302-7515-4e40-a5a7-e13e55515291")

'SUCCESS'

In [53]:
!ls heart_data.csv

ls: cannot access heart_data.csv: No such file or directory


In [106]:
!cp processed.cleveland.data processed_data.csv

In [11]:
client.set.default_project("6691f5c8-e78d-48b7-9265-b263febeb21a")

'SUCCESS'

In [12]:
client.data_assets.list()

--------------  ----------  ----  ------------------------------------
NAME            ASSET_TYPE  SIZE  ASSET_ID
heart_test.csv  data_asset  6197  a8b6d2b5-c5c7-45ee-a521-43c7669a9c2d
--------------  ----------  ----  ------------------------------------


In [20]:
client.spaces.list()

------------------------------------  -------------  ------------------------
GUID                                  NAME           CREATED
bc08fa02-bd5c-4ae4-bb8b-59c62b0b2fc0  checkpy_test1  2019-10-10T06:57:29.929Z
fae96817-a6a3-48b2-96d4-bd0540080daf  checkpy        2019-10-05T14:06:29.134Z
------------------------------------  -------------  ------------------------


# Create a data assets in a space

In [21]:
client.set.default_space("fae96817-a6a3-48b2-96d4-bd0540080daf")

Unsetting the project_id ...


'SUCCESS'

In [22]:
asset_details = client.data_assets.create(name="MyAsset_heart_data",file_path="heart_data.csv")

Creating data asset...
SUCCESS


In [23]:
asset_uid = client.data_assets.get_uid(asset_details)

In [34]:
client.data_assets.list()

-----------------------------------------------------------  ----------  ----  ------------------------------------
NAME                                                         ASSET_TYPE  SIZE  ASSET_ID
MyAsset_heart_data                                           data_asset  6197  9bbc0f70-7fee-47f0-a3e2-f120337b0e2f
sk_learn_results_1_411e2392-8fbf-4680-9336-56a22daeac93.csv  data_asset  4532  9267cf9f-9854-423d-8edc-361e5b038365
heart_test.csv                                               data_asset  6197  b5016049-3467-42fa-8da8-67f6eb626edf
sk_learn_results_a8de5c0d-773b-4e06-b943-d579e4edd659.csv    data_asset  4532  d4a9e154-9c97-49df-976f-8fcee1f1efcd
MyAsset_heart_data                                           data_asset  6197  f9d595cf-f4df-4213-a5b5-e33143ee53ea
-----------------------------------------------------------  ----------  ----  ------------------------------------


In [30]:
client.data_assets.delete("e748490e-7339-4de9-a84d-99af60ae440c")

'SUCCESS'

# Example for Downloading model 

In [120]:
client.repository.download("db7e4671-1275-4d82-b1b8-34213a43f353",filename="scikit_scikit-learn_0.20")

Successfully saved model content to file: 'scikit_scikit-learn_0.20'


'/home/wsuser/work/scikit_scikit-learn_0.20'

# Initialise client to point to another cluster

In [None]:
wml_credentials = {
    "token" : token,
    "instance_id" : "wml_local",
    "url": "https://some-other-cluster.apps.cp4dfvt-lb-1.fyre.ibm.com",
    "version": "2.5.0"
}

from watson_machine_learning_client import WatsonMachineLearningAPIClient

client_other_cluster = WatsonMachineLearningAPIClient(wml_credentials)

# Save the model from the downloaded file to another cluster

In [None]:
client_other_cluster.set.default_space("space-id")

In [None]:
meta_props={
     client_other_cluster.repository.ModelMetaNames.NAME: "skl_pipeline_heart_problem_prediction_nov20",
     client_other_cluster.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.20-py3",
     client_other_cluster.repository.ModelMetaNames.TYPE: "scikit-learn_0.20"

}

In [None]:



model_artifact = client_other_cluster.repository.store_model(model="/home/wsuser/work/scikit_scikit-learn_0.20",
                                                   meta_props=meta_props
                             )