In [None]:
! pip3 install --upgrade google-cloud-aiplatform

### Set project ID, location, and GCS Bucket

In [None]:
PROJECT_ID = "GCP_PROJECT"
LOCATION = "GCP_LOCATION"
GCS_BUCKET = "GCS_BUCKET"

In [2]:
from google.cloud import aiplatform
from google.cloud.aiplatform import hyperparameter_tuning as hpt
import pandas as pd
import numpy as np
import os

### Initialise the AI platform sdk

In [3]:
aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=GCS_BUCKET)

### Setup docker image of XGBoost ( publicly available)

In [4]:
TRAIN_VERSION = "xgboost-cpu.1-6"
DEPLOY_VERSION = "xgboost-cpu.1-6"

TRAIN_IMAGE = "{}-docker.pkg.dev/vertex-ai/training/{}:latest".format(
    LOCATION.split("-")[0], TRAIN_VERSION
)
DEPLOY_IMAGE = "{}-docker.pkg.dev/vertex-ai/prediction/{}:latest".format(
    LOCATION.split("-")[0], DEPLOY_VERSION
)


### Copy the hyperparametertuning code to cloud storage

In [11]:
! rm -f boston-data-trainer.tar boston-data-trainer.tar.gz
! tar cvf boston-data-trainer.tar boston-data-trainer
! gzip boston-data-trainer.tar
! gsutil cp boston-data-trainer.tar.gz $GCS_BUCKET/

boston-data-trainer/
boston-data-trainer/README.md
boston-data-trainer/setup.py
boston-data-trainer/trainer/
boston-data-trainer/trainer/__init__.py
boston-data-trainer/trainer/.ipynb_checkpoints/
boston-data-trainer/trainer/.ipynb_checkpoints/train-checkpoint.py
boston-data-trainer/trainer/train.py
boston-data-trainer/PKG-INFO
boston-data-trainer/setup.cfg
Copying file://boston-data-trainer.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  2.1 KiB/  2.1 KiB]                                                
Operation completed over 1 objects/2.1 KiB.                                      


# Set disk type, disk size, machine spec for hyperparametertuning worker pool

In [5]:
DISK_TYPE = "pd-ssd"  # [ pd-ssd, pd-standard]
DISK_SIZE = 100  # GB
COMPUTE_MACHINE = "n1-standard-4"
machine_spec = {"machine_type": COMPUTE_MACHINE, "accelerator_count": 0}
disk_spec = {"boot_disk_type": DISK_TYPE, "boot_disk_size_gb": DISK_SIZE}

In [6]:
# Set path to save model
MODEL_DIR = "{}/aiplatform-custom-job".format(GCS_BUCKET)
# Set the source path to the dataset
DATASET_DIR = f"{GCS_BUCKET}/dataset"

# Set the command-line arguments
ARGS = [
    "--dataset-data-url=" + DATASET_DIR + "/data.csv",
    "--dataset-labels-url=" + DATASET_DIR + "/target.csv",
]

# Set the worker pool specs
worker_pool_spec = [
    {
        "replica_count": 1,
        "machine_spec": machine_spec,
        "disk_spec": disk_spec,
        "python_package_spec": {
            "executor_image_uri": TRAIN_IMAGE,
            "package_uris": [GCS_BUCKET + "/boston-data-trainer.tar.gz"],
            "python_module": "trainer.train",
            "args": ARGS,
        },
    }
    
]

# Launch hyperparametertuning job

In [7]:

job = aiplatform.CustomJob(
    display_name="boston",
    worker_pool_specs=worker_pool_spec,
    base_output_dir=MODEL_DIR,
)

### Setup hyperparameter tuning parameters
### Use default search algorithm (Bayesian)

In [8]:

hpt_job = aiplatform.HyperparameterTuningJob(
    display_name="Boston Housing",
    custom_job=job,
    metric_spec={
        "mean_squared_error": "minimize",
    },
    parameter_spec={
        "n-estimators": hpt.IntegerParameterSpec(min=50, max=150, scale="linear"),
        "max-depth": hpt.IntegerParameterSpec(min=3, max=7, scale="linear"),
        "learning-rate": hpt.DoubleParameterSpec(min=0.01, max=0.2, scale="log"),
        "subsample": hpt.DoubleParameterSpec(min=0.3, max=0.9, scale="linear"),
    },
    search_algorithm=None,
    max_trial_count=20,
    parallel_trial_count=4,
)

In [9]:
hpt_job.run()

Creating HyperparameterTuningJob
HyperparameterTuningJob created. Resource name: projects/163391326695/locations/us-central1/hyperparameterTuningJobs/8907984101454118912
To use this HyperparameterTuningJob in another session:
hpt_job = aiplatform.HyperparameterTuningJob.get('projects/163391326695/locations/us-central1/hyperparameterTuningJobs/8907984101454118912')
View HyperparameterTuningJob:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/8907984101454118912?project=163391326695
HyperparameterTuningJob projects/163391326695/locations/us-central1/hyperparameterTuningJobs/8907984101454118912 current state:
JobState.JOB_STATE_PENDING
HyperparameterTuningJob projects/163391326695/locations/us-central1/hyperparameterTuningJobs/8907984101454118912 current state:
JobState.JOB_STATE_PENDING
HyperparameterTuningJob projects/163391326695/locations/us-central1/hyperparameterTuningJobs/8907984101454118912 current state:
JobState.JOB_STATE_PENDING
HyperparameterTuningJ

In [10]:
print(hpt_job.trials)

[id: "1"
state: SUCCEEDED
parameters {
  parameter_id: "learning-rate"
  value {
    number_value: 0.04472135954999579
  }
}
parameters {
  parameter_id: "max-depth"
  value {
    number_value: 5.0
  }
}
parameters {
  parameter_id: "n-estimators"
  value {
    number_value: 100.0
  }
}
parameters {
  parameter_id: "subsample"
  value {
    number_value: 0.6000000000000001
  }
}
final_measurement {
  step_count: 1
  metrics {
    metric_id: "mean_squared_error"
    value: 19.513283731437348
  }
}
start_time {
  seconds: 1729766367
  nanos: 920411993
}
end_time {
  seconds: 1729766708
}
, id: "2"
state: SUCCEEDED
parameters {
  parameter_id: "learning-rate"
  value {
    number_value: 0.0840938675925055
  }
}
parameters {
  parameter_id: "max-depth"
  value {
    number_value: 5.0
  }
}
parameters {
  parameter_id: "n-estimators"
  value {
    number_value: 81.0
  }
}
parameters {
  parameter_id: "subsample"
  value {
    number_value: 0.5290918676298215
  }
}
final_measurement {
  step

In [11]:
# Initialize a dictionary to identify the best configuration
best = {
                "id": None,
                "learning_rate": None,
                "max_depth": None,
                "n_estimators": None,
                "subsample": None,
                "mean_squared_error": 1000,
        }

# Iterate through the trails and update the best configuration
for trial in hpt_job.trials:
    # Keep track of the best outcome
    if float(trial.final_measurement.metrics[0].value) < best["mean_squared_error"]:
        best = {
                "id": trial.id,
                "learning_rate": float(trial.parameters[0].value),
                "max_depth": float(trial.parameters[1].value),
                "n_estimators": float(trial.parameters[2].value),
                "subsample": float(trial.parameters[3].value),
                "mean_squared_error": float(trial.final_measurement.metrics[0].value),
        }
        
# print details of the best configuration
print(best)

{'id': '8', 'learning_rate': 0.11117540967587292, 'max_depth': 3.0, 'n_estimators': 128.0, 'subsample': 0.3008784076506272, 'mean_squared_error': 14.953580245548158}


In [12]:
# Fetch the best model
BEST_MODEL_DIR = MODEL_DIR + "/" + best["id"] + "/model"

! gsutil ls {BEST_MODEL_DIR}

gs://ml-task-bucket/aiplatform-custom-job/8/model/
gs://ml-task-bucket/aiplatform-custom-job/8/model/metrics.json
gs://ml-task-bucket/aiplatform-custom-job/8/model/model.bst


In [13]:
! gsutil cp -r {BEST_MODEL_DIR} .

Copying gs://ml-task-bucket/aiplatform-custom-job/8/model/metrics.json...
Copying gs://ml-task-bucket/aiplatform-custom-job/8/model/model.bst...          
/ [2 files][ 64.7 KiB/ 64.7 KiB]                                                
Operation completed over 2 objects/64.7 KiB.                                     


In [18]:
# Delete the hyperparameter tuning job
hpt_job.delete()

# Delete the Cloud Storage bucket
delete_bucket = False  # Set True to delete the bucket

if delete_bucket:
    ! gsutil rm -r $GCS_BUCKET

# Delete the locally generated files
# ! rm -rf boston-data-trainer/
! rm boston-data-trainer.tar.gz

Deleting HyperparameterTuningJob : projects/163391326695/locations/us-central1/hyperparameterTuningJobs/6199970525125541888
HyperparameterTuningJob deleted. . Resource name: projects/163391326695/locations/us-central1/hyperparameterTuningJobs/6199970525125541888
Deleting HyperparameterTuningJob resource: projects/163391326695/locations/us-central1/hyperparameterTuningJobs/6199970525125541888
Delete HyperparameterTuningJob backing LRO: projects/163391326695/locations/us-central1/operations/4405152276797194240
HyperparameterTuningJob resource projects/163391326695/locations/us-central1/hyperparameterTuningJobs/6199970525125541888 deleted.
