# Train a Xgboost Model with Watson Machine Learning 

Notebook created by Zeming Zhao on June, 2021

XGBoost is an implementation of gradient boosted decision trees designed for speed and performance. which is an algorithm that has recently been dominating applied machine learning and Kaggle competitions for structured or tabular data.

This notebook covers the following sections:

1. [Setup Xgboost Model using xgboost-gpu lib](#rbm-model)<br>

1. [Training the model on GPU with Watson Machine Learning Accelerator](#gpu)<br>

1. [Training the model on CPU with Watson Machine Learning Accelerator](#cpu)<br>

<a id = "rbm-model"></a>
## Step 1 : Setup Xgboost model using cuML

### Prepare directory and file for writing Xgboost engine.

In [2]:
from pathlib import Path
model_dir = f'/data/models' 
model_main = f'Xgboost_main.py'
Path(model_dir).mkdir(exist_ok=True)
print("create model directory done.")

create model directory done.


### create a Xgboost Model based on xgboost lib for GPU

In [11]:
%%writefile {model_dir}/{model_main}

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, accuracy_score
import numpy as np
import datetime
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import xgboost as xgb

# prepare data
dataset = load_iris()

X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

D_train = xgb.DMatrix(X_train, label=y_train)
D_test = xgb.DMatrix(X_test, label=y_test)

# set parameters
param = {
    'eta': 0.3, 
    'max_depth': 3,  
    'objective': 'multi:softprob',  
    'num_class': 3} 

steps = 20  # The number of training iterations

# setup model and train
start = datetime.datetime.now()
model = xgb.train(param, D_train, steps)
end = datetime.datetime.now()
print ("Xgboost train timecost: %.2gs" % ((end-start).total_seconds()))


# predict
start = datetime.datetime.now()
preds = model.predict(D_test)
end = datetime.datetime.now()
print ("Xgboost predict timecost: %.2gs" % ((end-start).total_seconds()))

# check result
best_preds = np.asarray([np.argmax(line) for line in preds])
print("Precision = {}".format(precision_score(y_test, best_preds, average='macro')))
print("Recall = {}".format(recall_score(y_test, best_preds, average='macro')))
print("Accuracy = {}".format(accuracy_score(y_test, best_preds)))

# save the xgboost model into a file
import pickle
filename = './xgboost_model.pkl'
pickle.dump(model, open(filename, 'wb'))              
print("Xgboost model saved successfully.")

Overwriting /data/models/Xgboost_main.py


## Step 2 :  Training the Xgboost model on GPU with Watson Machine Learning Accelerator

<a id = "gpu"></a>
#### Prepare the model lib for running on GPU:

In [12]:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

from matplotlib import pyplot as plt
%pylab inline

import base64
import json
import time
import urllib

Populating the interactive namespace from numpy and matplotlib


#### Configuring your environment and project details
To set up your project details, provide your credentials in this cell. You must include your cluster URL, username, and password.

In [13]:
hostname='wmla-console-wmla.apps.wml1x180.ma.platformlab.ibm.com'  # please enter Watson Machine Learning Accelerator host name
# login='username:password' # please enter the login and password
login='admin:p7PMrMMknVQzEb3ptyj0D6XRTO5PQjYL'
es = base64.b64encode(login.encode('utf-8')).decode("utf-8")
# print(es)
commonHeaders={'Authorization': 'Basic '+es}
req = requests.Session()
auth_url = 'https://{}/auth/v1/logon'.format(hostname)
print(auth_url)

a=requests.get(auth_url,headers=commonHeaders, verify=False)
access_token=a.json()['accessToken']
# print("Access_token: ", access_token)

https://wmla-console-wmla.apps.wml1x180.ma.platformlab.ibm.com/auth/v1/logon


In [14]:
dl_rest_url = 'https://{}/platform/rest/deeplearning/v1'.format(hostname)
commonHeaders={'accept': 'application/json', 'X-Auth-Token': access_token}
req = requests.Session()

In [15]:
# Health check
confUrl = 'https://{}/platform/rest/deeplearning/v1/conf'.format(hostname)
r = req.get(confUrl, headers=commonHeaders, verify=False)


#### define the status checking fuction

In [16]:
import tarfile
import tempfile
import os
import json
import pprint
import pandas as pd
from IPython.display import clear_output

def query_job_status(job_id,refresh_rate=3) :

    execURL = dl_rest_url  +'/execs/'+ job_id['id']
    pp = pprint.PrettyPrinter(indent=2)

    keep_running=True
    res=None
    while(keep_running):
        res = req.get(execURL, headers=commonHeaders, verify=False)
        monitoring = pd.DataFrame(res.json(), index=[0])
        pd.set_option('max_colwidth', 120)
        clear_output()
        print("Refreshing every {} seconds".format(refresh_rate))
        display(monitoring)
        pp.pprint(res.json())
        if(res.json()['state'] not in ['PENDING_CRD_SCHEDULER', 'SUBMITTED','RUNNING']) :
            keep_running=False
        time.sleep(refresh_rate)
    return res

In [17]:
model_file = model_dir+"/"+model_main
files = {'file': open(model_file , 'rb')}

args = '--exec-start tensorflow --cs-datastore-meta type=fs \
                     --workerDeviceNum 1 \
                     --conda-env-name dlipy3 \
                     --model-main /gpfs/mydatafs/models/'+model_main+' --workerDeviceType gpu'

In [18]:
starttime = datetime.datetime.now()

r = requests.post(dl_rest_url+'/execs?args='+args, files=files,
                  headers=commonHeaders, verify=False)
if not r.ok:
    print('submit job failed: code=%s, %s'%(r.status_code, r.content))
        
job_status = query_job_status(r.json(),refresh_rate=5)

endtime = datetime.datetime.now()

print("\nTraining cost: ", (endtime - starttime).seconds, " seconds.")

Refreshing every 5 seconds


Unnamed: 0,id,args,submissionId,creator,state,appId,schedulerUrl,modelFileOwnerName,workDir,appName,createTime,elastic,nameSpace,numWorker,framework
0,wmla-214,--exec-start tensorflow --cs-datastore-meta type=fs --workerDeviceNum 1 --...,wmla-214,admin,FINISHED,wmla-214,https://wmla-mss:9080,wmla,/gpfs/myresultfs/admin/batchworkdir/wmla-214/_submitted_code,SingleNodeTensorflowTrain,2021-07-05T09:04:46Z,False,wmla,1,tensorflow


{ 'appId': 'wmla-214',
  'appName': 'SingleNodeTensorflowTrain',
  'args': '--exec-start tensorflow --cs-datastore-meta '
          'type=fs                      --workerDeviceNum '
          '1                      --conda-env-name dlipy3                      '
          '--model-main /gpfs/mydatafs/models/Xgboost_main.py '
          '--workerDeviceType gpu ',
  'createTime': '2021-07-05T09:04:46Z',
  'creator': 'admin',
  'elastic': False,
  'framework': 'tensorflow',
  'id': 'wmla-214',
  'modelFileOwnerName': 'wmla',
  'nameSpace': 'wmla',
  'numWorker': 1,
  'schedulerUrl': 'https://wmla-mss:9080',
  'state': 'FINISHED',
  'submissionId': 'wmla-214',
  'workDir': '/gpfs/myresultfs/admin/batchworkdir/wmla-214/_submitted_code'}

Training cost:  37  seconds.


## Step 3 :  Training the Xgboost model on CPU with Watson Machine Learning Accelerator

In [19]:
model_file = model_dir+"/"+model_main
files = {'file': open(model_file , 'rb')}

args = '--exec-start tensorflow --cs-datastore-meta type=fs \
                     --workerDeviceNum 1 \
                     --conda-env-name rapids-21.06  \
                     --model-main /gpfs/mydatafs/models/'+model_main+' --workerDeviceType cpu'

In [20]:
import datetime

starttime = datetime.datetime.now()

# ! python {model_dir}/{model_main} # --no-cuda --epochs 5 --batch-size 10000
r = requests.post(dl_rest_url+'/execs?args='+args, files=files,
                  headers=commonHeaders, verify=False)
if not r.ok:
    print('submit job failed: code=%s, %s'%(r.status_code, r.content))
        
job_status = query_job_status(r.json(),refresh_rate=5)

endtime = datetime.datetime.now()
print("Training cost: ", (endtime - starttime).seconds, " seconds.")


Refreshing every 5 seconds


Unnamed: 0,id,args,submissionId,creator,state,appId,schedulerUrl,modelFileOwnerName,workDir,appName,createTime,elastic,nameSpace,numWorker,framework
0,wmla-215,--exec-start tensorflow --cs-datastore-meta type=fs --workerDeviceNum 1 --...,wmla-215,admin,FINISHED,wmla-215,https://wmla-mss:9080,wmla,/gpfs/myresultfs/admin/batchworkdir/wmla-215/_submitted_code,SingleNodeTensorflowTrain,2021-07-05T09:06:14Z,False,wmla,1,tensorflow


{ 'appId': 'wmla-215',
  'appName': 'SingleNodeTensorflowTrain',
  'args': '--exec-start tensorflow --cs-datastore-meta '
          'type=fs                      --workerDeviceNum '
          '1                      --conda-env-name '
          'rapids-21.06                       --model-main '
          '/gpfs/mydatafs/models/Xgboost_main.py --workerDeviceType cpu ',
  'createTime': '2021-07-05T09:06:14Z',
  'creator': 'admin',
  'elastic': False,
  'framework': 'tensorflow',
  'id': 'wmla-215',
  'modelFileOwnerName': 'wmla',
  'nameSpace': 'wmla',
  'numWorker': 1,
  'schedulerUrl': 'https://wmla-mss:9080',
  'state': 'FINISHED',
  'submissionId': 'wmla-215',
  'workDir': '/gpfs/myresultfs/admin/batchworkdir/wmla-215/_submitted_code'}
Training cost:  26  seconds.
