In [1]:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

import json
import time
import urllib

system = 'colonia'

if system == 'colonia':
    hostname = 'colonia04'
    myauth = ('Admin','Admin')
    protocol = 'http'
    port = 9280
    sigName = 'dli-hpo'
    trainFile = '/dlidata/dataset/price_prediction/pp_train.dmatrix'
    valFile = '/dlidata/dataset/price_prediction/pp_val.dmatrix'
elif system == 'ma1gpu':
    hostname = 'ma1gpu03.ma.platformlab.ibm.com'
    myauth = ('username','password')
    protocol = 'https'
    port = 9243
    sigName = 'wml-ig'
    trainFile = '/ma1gpu01_nfsv4/martin/shared_fs_userdata/datasets/higgs/train/HIGGS_train.dmatrix'
    testFile = '/ma1gpu01_nfsv4/martin/shared_fs_userdata/datasets/higgs/test/HIGGS_test.dmatrix'
    valFile = '/ma1gpu01_nfsv4/martin/shared_fs_userdata/datasets/higgs/val/HIGGS_val.dmatrix'

commonHeaders={'accept': 'application/json'}

startTuneUrl='%s://%s:%d/platform/rest/deeplearning/v1/hypersearch' % (protocol, hostname, port)

req = requests.Session()


In [5]:
import tarfile
import tempfile
import os
def make_tarfile(output_filename, source_dir):
    with tarfile.open(output_filename, "w:gz") as tar:
        tar.add(source_dir, arcname=os.path.basename(source_dir))


data =  {
        'modelSpec': 
        {
            'sigName': sigName,
            'args': '--exec-start XGboost --cs-datastore-meta type=fs \
                     --gpuPerWorker 1 --model-main main.py \
                     --model-dir xgb-model \
                     --trainFile %s \
                     --valFile %s' % (trainFile, valFile)
        },
'algoDef':
        {
            'algorithm': 'Random',
            'maxRunTime': 30,
            'maxJobNum': 100,
            'maxParalleJobNum': 4,
            'objectiveMetric' : 'mse_val',
            'objective' : 'minimize'
        },
        'hyperParams':
        [
             {
                 'name': 'learning_rate',
                 'type': 'Range',
                 'dataType': 'DOUBLE',
                 'minDbVal': -2,
                 'maxDbVal': 0,
                 'power': 10,
             },
             {
                 'name': 'num_rounds',
                 'type': 'Range',
                 'dataType': 'INT',
                 'minIntVal': 1,
                 'maxIntVal': 1000,
                 'step': 1,
             },
             {
                 'name': 'max_depth',
                 'type': 'Range',
                 'dataType': 'INT',
                 'minIntVal': 1,
                 'maxIntVal': 14,
                 'step': 1,
             },
             {
                 'name': 'lambda',
                 'type': 'Range',
                 'dataType': 'DOUBLE',
                 'minDbVal': -2,
                 'maxDbVal': 5,
                 'power': 10,
             },
             {
                 'name': 'colsample_bytree',
                 'type': 'Range',
                 'dataType': 'DOUBLE',
                 'minDbVal': 0.01,
                 'maxDbVal': 1.0,
                 'step': 0.01,
             },


         ]
    }

mydata={'data':json.dumps(data)}

MODEL_DIR_SUFFIX = ".modelDir.tar"
tempFile = tempfile.mktemp(MODEL_DIR_SUFFIX)
make_tarfile(tempFile, './xgb-model')
files = {'file': open(tempFile, 'rb')}
create = req.post(startTuneUrl, headers=commonHeaders, data=mydata, files=files, verify=False, auth=myauth)
if not create.ok:
   print('submit tune job failed: code=%s, %s'%(create.status_code, create.content))
else:
   print('submit tune job succeed with hponame: %s'%create.json())

submit tune job succeed with hponame: Admin-hpo-83966261958354


In [6]:
import time

hpoName = create.json()

getHpoUrl = '%s://%s:%d/platform/rest/deeplearning/v1/hypersearch/%s' % (protocol, hostname, port, hpoName)

res = req.get(getHpoUrl, headers=commonHeaders, verify=False, auth=myauth)
if not res.ok:
    print('get hpo task failed: code=%s, %s'%(res.status_code, res.content))
else:
    json_out=res.json()

    while json_out['state'] in ['SUBMITTED','RUNNING']:
        print('Hpo task %s state %s progress %s%%'%(hpoName, json_out['state'], json_out['progress']))
        time.sleep(20)
        res = req.get(getHpoUrl, headers=commonHeaders, verify=False, auth=myauth)
        json_out=res.json()

    print('Hpo task %s completes with state %s'%(hpoName, json_out['state']))
    print(json.dumps(json_out, indent=4, sort_keys=True))


Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 0%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 4%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 4%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 4%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 4%
Hpo task Admin-hpo-83966261958354 state RUNNING progress 4%
Hpo task Admin-hpo-83966261958354 state 