In [5]:
from google.cloud import aiplatform
import os
from datetime import datetime

In [6]:
PROJECT_ID = "mlops-explorations"
REGION = "us-central1"
BUCKET_URI = "gs://mlops-fine-tuning-gemma"
SERVICE_ACCOUNT = "939282436854-compute@developer.gserviceaccount.com"
VERTEX_AI_MODEL_GARDEN_GEMMA = "https://storage.googleapis.com/vertex-ai/generative-ai/model-garden/gemma.tar.gz"
# ETL: https://colab.research.google.com/drive/1eBPABHwRo6vyAdUoBJ5ERVS8xRe-wwLv?usp=sharing
STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
MODEL_BUCKET = os.path.join(BUCKET_URI, "gemma")
TRAIN_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-peft-train:20240220_0936_RC01"
base_model_path_prefix = MODEL_BUCKET
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)


In [7]:
dataset_name = "HuggingFaceH4/CodeAlpaca_20K"
instruct_column_in_dataset = "text"
template = "gs://mlops-fine-tuning-gemma/templates/code-alpaca.json"

In [8]:
def get_job_name_with_datetime(prefix: str) -> str:
    """Gets the job name with date time when triggering training or deployment
    jobs in Vertex AI.
    """
    return prefix + datetime.now().strftime("_%Y%m%d_%H%M%S")

In [10]:
# The Gemma base model.
MODEL_ID = "gemma-2b" # ["gemma-2b", "gemma-2b-it", "gemma-7b", "gemma-7b-it"]
# The accelerator to use.
ACCELERATOR_TYPE = "NVIDIA_L4"  # ["NVIDIA_TESLA_V100", "NVIDIA_L4", "NVIDIA_TESLA_A100"]
base_model_id = os.path.join(base_model_path_prefix, MODEL_ID)

# Batch size for finetuning.
per_device_train_batch_size = 1  
max_steps = 1000  # Runs 10 training steps as a minimal example.
# Precision mode for finetuning.
finetuning_precision_mode = "float16"  # ["4bit", "8bit", "float16"]
# Learning rate.
learning_rate = 2e-4  # 
# LoRA parameters.
lora_rank = 16  
lora_alpha = 64
lora_dropout = 0.1

# Worker pool spec.
# Gemma only supports single-GPU finetuning.
accelerator_count = 1
machine_type = None
if "7b" in MODEL_ID:
    assert (
        ACCELERATOR_TYPE != "NVIDIA_TESLA_V100"
    ), "Finetuning Gemma 7B models is not supported on NVIDIA_TESLA_V100. Try a GPU with more VRAM."
    if ACCELERATOR_TYPE == "NVIDIA_L4":
        assert (
            finetuning_precision_mode == "4bit"
        ), f"{finetuning_precision_mode} finetuning of Gemma 7B models is not supported on NVIDIA_L4. Try a GPU with more VRAM or use a different precision mode."

if ACCELERATOR_TYPE == "NVIDIA_TESLA_V100":
    machine_type = "n1-standard-8"
elif ACCELERATOR_TYPE == "NVIDIA_L4":
    machine_type = "g2-standard-12"
elif ACCELERATOR_TYPE == "NVIDIA_TESLA_A100":
    machine_type = "a2-highgpu-1g"

assert (
    machine_type
), f"Cannot automatically determine machine type from {ACCELERATOR_TYPE}."

replica_count = 1

# Setup training job.
job_name = get_job_name_with_datetime("gemma-lora-train")

# Pass training arguments and launch job.
train_job = aiplatform.CustomContainerTrainingJob(
    display_name=job_name,
    container_uri=TRAIN_DOCKER_URI,
)

# Create a GCS folder to store the LORA adapter.
lora_adapter_dir = get_job_name_with_datetime("gemma-lora-adapter")
lora_output_dir = os.path.join(STAGING_BUCKET, lora_adapter_dir)

# Create a GCS folder to store the merged model with the base model and the
# finetuned LORA adapter.
merged_model_dir = get_job_name_with_datetime("gemma-merged-model")
merged_model_output_dir = os.path.join(STAGING_BUCKET, merged_model_dir)

train_job.run(
    args=[
        "--task=instruct-lora",
        f"--pretrained_model_id={base_model_id}",
        f"--dataset_name={dataset_name}",
        f"--instruct_column_in_dataset={instruct_column_in_dataset}",
        f"--output_dir={lora_output_dir}",
        f"--merge_base_and_lora_output_dir={merged_model_output_dir}",
        f"--per_device_train_batch_size={per_device_train_batch_size}",
        f"--lora_rank={lora_rank}",
        f"--lora_alpha={lora_alpha}",
        f"--lora_dropout={lora_dropout}",
        f"--max_steps={max_steps}",
        "--max_seq_length=512",
        f"--learning_rate={learning_rate}",
        f"--precision_mode={finetuning_precision_mode}",
        f"--template={template}"
    ],
    environment_variables={"WANDB_DISABLED": True},
    replica_count=replica_count,
    machine_type=machine_type,
    accelerator_type=ACCELERATOR_TYPE,
    accelerator_count=accelerator_count,
    boot_disk_size_gb=500,
    service_account=SERVICE_ACCOUNT,
)

print("LoRA adapter was saved in: ", lora_output_dir)
print("Trained and merged models were saved in: ", merged_model_output_dir)


Training Output directory:
gs://mlops-fine-tuning-gemma/temporal/aiplatform-custom-training-2024-04-08-02:50:06.352 
View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/4304458314730700800?project=939282436854
