# Create and run a custom Vertex AI Training Job from a local script

## Install Vertex AI Python Client

In [None]:
!pip install -r requirements.txt --upgrade

## GCP authentication

In [None]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= "" # TODO: path to credentials .json file

## Create the custom Vertex AI Training Job

In [None]:
# Import the Vertex AI SDK (Python Client)
from google.cloud import aiplatform

In [None]:
# Project meta data
PROJECT_ID="" # TODO
LOCATION= "" # TODO

# Variables for specifying the job
DISPLAY_NAME="" # TODO: How the job is displayed on Vertex AI GUI
SCRIPT_PATH="./task.py" # Path to local training script
STAGING_BUCKET= "" # TODO GCS URI where meta data and artifacts are stored for this job
MODEL_TRAINING_IMAGE="europe-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.0-23:latest" # Pre-built training image
REQUIREMENTS = ["wget"] # Additional requirements not already part of the base image
# !Required if the Training Pipeline produces a managed Vertex AI Model!
MODEL_SERVING_IMAGE="europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-23:latest" # Pre-built serving image

In [None]:
# Job definition
custom_training_job = aiplatform.CustomTrainingJob(
    project=PROJECT_ID,
    location=LOCATION,
    display_name=DISPLAY_NAME,
    script_path=SCRIPT_PATH,
    staging_bucket=STAGING_BUCKET,
    container_uri=MODEL_TRAINING_IMAGE,
    requirements=REQUIREMENTS,
    model_serving_container_image_uri=MODEL_SERVING_IMAGE
)

In [None]:
# Variables for running the job
MACHINE_TYPE="n1-standard-4" # Standard VM with 4 CPUs
# !Required if the Training Pipeline produces a managed Vertex AI Model!
MODEL_DISPLAY_NAME="" # TODO: Name for the resulting managed Vertex AI Model.
                      # Note that a single job may produce multiple models (e.g. one per run)
DATASET_URL="" # TODO: Our script accepts an argument --dataset_url
               # To specify the url to download the training data from.

In [None]:
# Run the job
model = custom_training_job.run(
    machine_type=MACHINE_TYPE,
    model_display_name=MODEL_DISPLAY_NAME, 
    args=[
        f"--dataset_url={DATASET_URL}",
        f"--project_id={PROJECT_ID}"
    ]
)

In [None]:
model