# Create and run a custom Vertex AI Training Job from a local script

## Install Vertex AI Python Client

In [None]:
!pip install -r requirements.txt --upgrade

## GCP authentication

In [None]:
import os

os.environ[
    "GOOGLE_APPLICATION_CREDENTIALS"
] = ""  # TODO: path to credentials .json file

## Create the custom Vertex AI Training Job

1. Define the custom job parameters
2. Submit the job to create a `Vertex AI Model`

In [None]:
# Import the Vertex AI SDK (Python Client)
from google.cloud import aiplatform

In [None]:
# Project meta data
PROJECT_ID = ""  # TODO
REGION = ""  # TODO e.g. europe
ZONE = ""  # TODO e.g. west4
LOCATION = f"{REGION}-{ZONE}"

In [None]:
aiplatform.init(project=PROJECT_ID, location=LOCATION)

In [None]:
# Variables for specifying the job
DISPLAY_NAME = (
    "news-classifier-training"  # TODO: How the job is displayed on Vertex AI GUI
)
SCRIPT_PATH = "./task.py"  # Path to local training script
STAGING_BUCKET = (
    ""  # TODO GCS URI where meta data and artifacts are stored for this job
)
MODEL_TRAINING_IMAGE = f"{REGION}-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.0-23:latest"  # Pre-built training image
REQUIREMENTS = ["wget"]  # Additional requirements not already part of the base image
# !Required if the Training Pipeline produces a managed Vertex AI Model!
MODEL_SERVING_IMAGE = f"{REGION}-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-23:latest"  # Pre-built serving image

In [None]:
# Job definition
custom_training_job = aiplatform.CustomTrainingJob(
    project=PROJECT_ID,
    location=LOCATION,
    display_name=DISPLAY_NAME,
    script_path=SCRIPT_PATH,
    staging_bucket=STAGING_BUCKET,
    container_uri=MODEL_TRAINING_IMAGE,
    requirements=REQUIREMENTS,
    model_serving_container_image_uri=MODEL_SERVING_IMAGE,
)

In [None]:
# Variables for running the job
MACHINE_TYPE = "n1-standard-4"  # Standard VM with 4 CPUs
# !Required if the Training Pipeline produces a managed Vertex AI Model!
MODEL_DISPLAY_NAME = (
    "news-classifier-model"  # TODO: Name for the resulting managed Vertex AI Model.
)
# Note that a single job may produce multiple models (e.g. one per run).
# The url to download the training data from.
DATASET_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00359/NewsAggregatorDataset.zip"

In [None]:
# Run the job
model = custom_training_job.run(
    machine_type=MACHINE_TYPE,
    model_display_name=MODEL_DISPLAY_NAME,
    args=[f"--dataset_url={DATASET_URL}", f"--project_id={PROJECT_ID}"],
)

In [None]:
MODEL_RESOURCE_NAME = model.resource_name

## Deploy model to Vertex AI Endpoint

1. Retrieve the registered `Vertex AI Model`
2. Deploy the model to a new `Vertex AI Endpoint`
3. Get some test predictions from the endpoint

In [None]:
ENDPOINT_DISPLAY_NAME = "news-classifier-endpoint"  # TODO
MACHINE_TYPE_SERVING = "n1-standard-2"

In [None]:
endpoint = aiplatform.Endpoint.create(
    display_name=ENDPOINT_DISPLAY_NAME,
    location=LOCATION,
    project=PROJECT_ID,
)

In [None]:
model = aiplatform.Model(model_name=MODEL_RESOURCE_NAME)

In [None]:
model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=MODEL_DISPLAY_NAME,
    machine_type=MACHINE_TYPE,
    traffic_percentage=100,
    min_replica_count=1,
    max_replica_count=1,
    accelerator_type=None,
    accelerator_count=None,
)

In [None]:
endpoint.predict(instances={"instances": ["A news headline to be classified"]})