# Vertex SDK Custom Training with Python Package for TF-Keras Text Classification GPU

## Setup

In [None]:
PROJECT_ID="aiplatform-dev"
BUCKET_NAME="gs://aiplatform-dev"
REGION="us-central1"

In [None]:
! gsutil mb -l $REGION $BUCKET_NAME

In [None]:
! gsutil ls -al $BUCKET_NAME

In [None]:
tutorial_name = "tf-keras-txt-cls-gpu"

## Local Training

In [None]:
! ls trainer

In [None]:
! cat trainer/requirements.txt

In [None]:
! pip install -r trainer/requirements.txt

In [None]:
! cat trainer/task.py

In [None]:
%run trainer/task.py \
  --epochs 5

In [None]:
! ls ./tmp

In [None]:
! rm -rf ./tmp

## Vertex SDK Custom Training using Python Package

### Built Python Package

In [None]:
! cat setup.py

In [None]:
%run setup.py sdist --formats=gztar

In [None]:
! ls dist/trainer-0.1.tar.gz

### Upload the Package to GCS

In [None]:
python_package_gcs_uri = f"{BUCKET_NAME}/{tutorial_name}/trainer-0.1.tar.gz"
python_module_name = "trainer.task"

In [None]:
! gsutil cp dist/trainer-0.1.tar.gz $python_package_gcs_uri

In [None]:
! gsutil ls $python_package_gcs_uri


In [None]:
! rm -rf dist
! rm -rf trainer.egg-info

### Pre-Built Tensorflow Container for Training

In [None]:
prebuilt_container_image_uri = "us-docker.pkg.dev/vertex-ai/training/tf-gpu.2-4:latest"

### Configs

In [None]:
!cat requirements.txt

In [None]:
!pip install -r requirements.txt

In [None]:
from google.cloud import aiplatform

aiplatform.init(
    project=PROJECT_ID,
    staging_bucket=BUCKET_NAME,
    location=REGION,
)

In [None]:
display_name = tutorial_name
gcs_output_uri_prefix = f"{BUCKET_NAME}/{display_name}"

replica_count = 1
machine_type = "n1-standard-32"
accelerator_count = 4
accelerator_type = "NVIDIA_TESLA_P100"

container_args = [
    '--epochs', '5'
]

### Run a CustomJob

In [None]:
custom_training_job = aiplatform.CustomPythonPackageTrainingJob(
    display_name=display_name,
    python_package_gcs_uri=python_package_gcs_uri,
    python_module_name=python_module_name,
    container_uri=prebuilt_container_image_uri,
)

In [None]:
custom_training_job.run(
    base_output_dir=gcs_output_uri_prefix,
    args=container_args,
    replica_count=replica_count,
    machine_type=machine_type,
    accelerator_count=accelerator_count,
    accelerator_type=accelerator_type,
)

In [None]:
print(f'Custom Training Job Name: {custom_training_job.resource_name}')
print(f'GCS Output URI Prefix: {gcs_output_uri_prefix}')

### Training Artifact

In [None]:
! gsutil ls $gcs_output_uri_prefix

In [None]:
! gsutil rm -rf $gcs_output_uri_prefix
! gsutil rm -rf $python_package_gcs_uri
