In [None]:
! pip install --upgrade --quiet gcsfs==2024.3.1
! pip install --upgrade --quiet accelerate==0.34.2
! pip install --upgrade --quiet transformers==4.47.1
! pip install --upgrade --quiet datasets==2.20.0

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.0/172.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m324.4/324.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:

BUCKET_URI = "gs://"
REGION = ""

# Import the necessary packages
! rm -rf vertex-ai-samples && git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git
! cd vertex-ai-samples && git reset --hard c45f6a4f4d32e31a050f0e4ba52824b0caf4eda3

import datetime
import importlib
import os
import uuid
from typing import Tuple

from google.cloud import aiplatform
from google.cloud.aiplatform.compat.types import \
    custom_job as gca_custom_job_compat

common_util = importlib.import_module(
    "vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util"
)

models, endpoints = {}, {}

# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
if not REGION:
    if not os.environ.get("GOOGLE_CLOUD_REGION"):
        raise ValueError(
            "REGION must be set. See"
            " https://cloud.google.com/vertex-ai/docs/general/locations for"
            " available cloud locations."
        )
    REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Enable the Vertex AI API and Compute Engine API, if not already.
print("Enabling Vertex AI API and Compute Engine API.")
! gcloud services enable aiplatform.googleapis.com compute.googleapis.com

# Cloud Storage bucket for storing the experiment artifacts.
# A unique GCS bucket will be created for the purpose of this notebook. If you
# prefer using your own GCS bucket, change the value yourself below.
now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])

if BUCKET_URI is None or BUCKET_URI.strip() == "" or BUCKET_URI == "gs://":
    BUCKET_URI = f"gs://{PROJECT_ID}-tmp-{now}-{str(uuid.uuid4())[:4]}"
    BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
    ! gsutil mb -l {REGION} {BUCKET_URI}
else:
    assert BUCKET_URI.startswith("gs://"), "BUCKET_URI must start with `gs://`."
    shell_output = ! gsutil ls -Lb {BUCKET_NAME} | grep "Location constraint:" | sed "s/Location constraint://"
    bucket_region = shell_output[0].strip().lower()
    if bucket_region != REGION:
        raise ValueError(
            "Bucket region %s is different from notebook region %s"
            % (bucket_region, REGION)
        )
print(f"Using this GCS Bucket: {BUCKET_URI}")

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
MODEL_BUCKET = os.path.join(BUCKET_URI, "llama3_1")


# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

# Gets the default SERVICE_ACCOUNT.
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this default Service Account:", SERVICE_ACCOUNT)


# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME

! gcloud config set project $PROJECT_ID
! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role="roles/storage.admin"
! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role="roles/aiplatform.user"

Cloning into 'vertex-ai-samples'...
remote: Enumerating objects: 44403, done.[K
remote: Counting objects: 100% (149/149), done.[K
remote: Compressing objects: 100% (113/113), done.[K
remote: Total 44403 (delta 80), reused 44 (delta 36), pack-reused 44254 (from 3)[K
Receiving objects: 100% (44403/44403), 99.36 MiB | 27.34 MiB/s, done.
Resolving deltas: 100% (34431/34431), done.
HEAD is now at c45f6a4f Support VPC-SC and workbench
Enabling Vertex AI API and Compute Engine API.
Operation "operations/acat.p2-878731303791-6f955d32-b1a3-415a-892c-2af8908ed2e3" finished successfully.
Creating gs://cs4296-tmp-20250421064552-f939/...
Using this GCS Bucket: gs://cs4296-tmp-20250421064552-f939
Initializing Vertex AI API.
Using this default Service Account: 878731303791-compute@developer.gserviceaccount.com
Updated property [core/project].


In [None]:
LOAD_MODEL_FROM = "Hugging Face"


HF_TOKEN = "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"

VERTEX_AI_MODEL_GARDEN_LLAMA3_1 = ""
if LOAD_MODEL_FROM == "Hugging Face":
    assert (
        HF_TOKEN
    ), "Provide a read HF_TOKEN to load models from Hugging Face, or select a different model source."
else:
    assert (
        VERTEX_AI_MODEL_GARDEN_LLAMA3_1
    ), "Click the agreement of Llama3.1 in Vertex AI Model Garden, and get the GCS path of the model artifacts."

MODEL_BUCKET = VERTEX_AI_MODEL_GARDEN_LLAMA3_1


In [None]:
template = "openassistant-guanaco"

train_dataset = "timdettmers/openassistant-guanaco"
train_split = "train"
eval_dataset = "timdettmers/openassistant-guanaco"
eval_split = "test"

train_column = "text"

max_seq_length = 4096

In [None]:
base_model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
if LOAD_MODEL_FROM == "Google Cloud":
    pretrained_model_id = os.path.join(MODEL_BUCKET, base_model_id.split("/")[-1])
else:
    pretrained_model_id = base_model_id

In [None]:
dataset_validation_util = importlib.import_module(
    "vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.dataset_validation_util"
)

if dataset_validation_util.is_gcs_path(pretrained_model_id):
    # Download tokenizer.
    ! mkdir tokenizer
    ! gsutil cp {pretrained_model_id}/tokenizer.json ./tokenizer
    ! gsutil cp {pretrained_model_id}/config.json ./tokenizer
    tokenizer_path = "./tokenizer"
    access_token = ""
else:
    tokenizer_path = pretrained_model_id
    access_token = HF_TOKEN

tokenizer = dataset_validation_util.load_tokenizer(tokenizer_path, None, access_token)

# Validate the train dataset.
dataset_validation_util.validate_dataset_with_template(
    dataset_name=train_dataset,
    split=train_split,
    input_column=train_column,
    template=template,
    max_seq_length=max_seq_length,
    use_multiprocessing=False,
    tokenizer=tokenizer,
)

# Validate the eval dataset if it exists.
if eval_dataset:
    dataset_validation_util.validate_dataset_with_template(
        dataset_name=eval_dataset,
        split=eval_split,
        input_column=train_column,
        template=template,
        max_seq_length=max_seq_length,
        use_multiprocessing=False,
        tokenizer=tokenizer,
    )

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/395 [00:00<?, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


Downloading data:   0%|          | 0.00/20.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.11M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9846 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/518 [00:00<?, ? examples/s]

Map:   0%|          | 0/9846 [00:00<?, ? examples/s]

Filter:   0%|          | 0/9846 [00:00<?, ? examples/s]

Dataset openassistant-guanaco is compatible with the openassistant-guanaco template.


Repo card metadata block was not found. Setting CardData to empty.


Map:   0%|          | 0/518 [00:00<?, ? examples/s]

Filter:   0%|          | 0/518 [00:00<?, ? examples/s]

Dataset openassistant-guanaco is compatible with the openassistant-guanaco template.


In [None]:
training_accelerator_type = "NVIDIA_A100_80GB"

if training_accelerator_type == "NVIDIA_A100_80GB":
    repo = "us-docker.pkg.dev/vertex-ai-restricted"
    is_restricted_image = True
    is_dynamic_workload_scheduler = False
    dws_kwargs = {}
    if "405b" in base_model_id.lower():
        raise ValueError(
            "405B model is not supported with Nvidia A100 GPUs. Use Nvidia H100 GPUs instead."
        )
else:
    repo = "us-docker.pkg.dev/vertex-ai"
    is_restricted_image = False
    is_dynamic_workload_scheduler = True
    dws_kwargs = {
        "max_wait_duration": 1800,  # 30 minutes
        "scheduling_strategy": gca_custom_job_compat.Scheduling.Strategy.FLEX_START,
    }

TRAIN_DOCKER_URI = (
    f"{repo}/vertex-vision-model-garden-dockers/pytorch-peft-train:stable_20250409"
)

# Worker pool spec.
boot_disk_size_gb = 500
if training_accelerator_type == "NVIDIA_A100_80GB":
    per_node_accelerator_count = 8
    training_machine_type = "a2-ultragpu-8g"
elif training_accelerator_type == "NVIDIA_H100_80GB":
    per_node_accelerator_count = 8
    training_machine_type = "a3-highgpu-8g"
    if "405b" in base_model_id.lower():
        boot_disk_size_gb = 2000
else:
    raise ValueError(
        f"Recommended machine settings not found for: {training_accelerator_type}. To use another accelerator type, edit this code block to pass in an appropriate `training_machine_type`, `training_accelerator_type`, and `per_node_accelerator_count` by clicking `Show Code` and then modifying the code."
    )


replica_count = 1

# Set config file.
if replica_count == 1:
    config_file = "vertex_vision_model_garden_peft/llama_fsdp_8gpu.yaml"
elif replica_count <= 4:
    config_file = (
        "vertex_vision_model_garden_peft/"
        f"llama_hsdp_{replica_count * per_node_accelerator_count}gpu.yaml"
    )
else:
    raise ValueError(
        f"Recommended config settings not found for replica_count: {replica_count}."
    )

per_device_train_batch_size = 1
gradient_accumulation_steps = 4
max_steps = -1
num_train_epochs = 1.0
finetuning_precision_mode = "4bit"
learning_rate = 5e-5
lr_scheduler_type = "cosine"
lora_rank = 16
lora_alpha = 32
lora_dropout = 0.05
gradient_checkpointing = True
attn_implementation = "flash_attention_2"
optimizer = "adamw_torch"
warmup_ratio = "0.01"
report_to = "tensorboard"
save_steps = 10
logging_steps = save_steps
eval_metric_name = "loss,perplexity,bleu"
metric_for_best_model = "perplexity"

common_util.check_quota(
    project_id=PROJECT_ID,
    region=REGION,
    accelerator_type=training_accelerator_type,
    accelerator_count=per_node_accelerator_count * replica_count,
    is_for_training=True,
    is_restricted_image=is_restricted_image,
    is_dynamic_workload_scheduler=is_dynamic_workload_scheduler,
)

job_name = common_util.get_job_name_with_datetime("llama3_1-lora-train")

base_output_dir = os.path.join(STAGING_BUCKET, job_name)
# Create a GCS folder to store the LORA adapter.
lora_output_dir = os.path.join(base_output_dir, "adapter")
# Create a GCS folder to store the finetuned LORA adapter.
final_checkpoint = os.path.join(lora_output_dir, "node-0", "checkpoint-final")

# Add labels for the finetuning job.
labels = {
    "mg-source": "notebook",
    "mg-notebook-name": "model_garden_pytorch_llama3_1_finetuning.ipynb".split(".")[0],
}

labels["mg-tune"] = "publishers-meta-models-llama3-1"
versioned_model_id = base_model_id.split("/")[1].lower().replace(".", "-")
labels["versioned-mg-tune"] = f"{labels['mg-tune']}-{versioned_model_id}"

eval_args = [
    f"--eval_dataset={eval_dataset}",
    f"--eval_column={train_column}",
    f"--eval_template={template}",
    f"--eval_split={eval_split}",
    f"--eval_steps={save_steps}",
    f"--eval_metric_name={eval_metric_name}",
    f"--metric_for_best_model={metric_for_best_model}",
]

train_job_args = [
    f"--config_file={config_file}",
    "--task=instruct-lora",
    "--input_masking=True",
    f"--pretrained_model_name_or_path={pretrained_model_id}",
    f"--train_dataset={train_dataset}",
    f"--train_split={train_split}",
    f"--train_column={train_column}",
    f"--output_dir={lora_output_dir}",
    f"--per_device_train_batch_size={per_device_train_batch_size}",
    f"--gradient_accumulation_steps={gradient_accumulation_steps}",
    f"--lora_rank={lora_rank}",
    f"--lora_alpha={lora_alpha}",
    f"--lora_dropout={lora_dropout}",
    f"--max_steps={max_steps}",
    f"--max_seq_length={max_seq_length}",
    f"--learning_rate={learning_rate}",
    f"--lr_scheduler_type={lr_scheduler_type}",
    f"--precision_mode={finetuning_precision_mode}",
    f"--gradient_checkpointing={gradient_checkpointing}",
    f"--num_train_epochs={num_train_epochs}",
    f"--attn_implementation={attn_implementation}",
    f"--optimizer={optimizer}",
    f"--warmup_ratio={warmup_ratio}",
    f"--report_to={report_to}",
    f"--logging_output_dir={base_output_dir}",
    f"--save_steps={save_steps}",
    f"--logging_steps={logging_steps}",
    f"--train_template={template}",
    f"--huggingface_access_token={HF_TOKEN}",
] + eval_args

# Pass training arguments and launch job.
train_job = aiplatform.CustomContainerTrainingJob(
    display_name=job_name,
    container_uri=TRAIN_DOCKER_URI,
    labels=labels,
)

print("Running training job with args:")
print(" \\\n".join(train_job_args))
train_job.run(
    args=train_job_args,
    replica_count=replica_count,
    machine_type=training_machine_type,
    accelerator_type=training_accelerator_type,
    accelerator_count=per_node_accelerator_count,
    boot_disk_size_gb=boot_disk_size_gb,
    service_account=SERVICE_ACCOUNT,
    base_output_dir=base_output_dir,
    sync=False,  # Non-blocking call to run.
    **dws_kwargs,
)

# Wait until resource has been created.
train_job.wait_for_resource_creation()

print("LoRA adapter will be saved in:", lora_output_dir)
print("Final checkpoint will be saved in:", final_checkpoint)


INFO:google.cloud.aiplatform.training_jobs:Training Output directory:
gs://cs4296-tmp-20250421064552-f939/temporal/llama3-1-lora-train-20250421-064937 


Running training job with args:
--config_file=vertex_vision_model_garden_peft/llama_fsdp_8gpu.yaml \
--task=instruct-lora \
--input_masking=True \
--pretrained_model_name_or_path=meta-llama/Meta-Llama-3.1-8B-Instruct \
--train_dataset=timdettmers/openassistant-guanaco \
--train_split=train \
--train_column=text \
--output_dir=gs://cs4296-tmp-20250421064552-f939/temporal/llama3-1-lora-train-20250421-064937/adapter \
--per_device_train_batch_size=1 \
--gradient_accumulation_steps=4 \
--lora_rank=16 \
--lora_alpha=32 \
--lora_dropout=0.05 \
--max_steps=-1 \
--max_seq_length=4096 \
--learning_rate=5e-05 \
--lr_scheduler_type=cosine \
--precision_mode=4bit \
--gradient_checkpointing=True \
--num_train_epochs=1.0 \
--attn_implementation=flash_attention_2 \
--optimizer=adamw_torch \
--warmup_ratio=0.01 \
--report_to=tensorboard \
--logging_output_dir=gs://cs4296-tmp-20250421064552-f939/temporal/llama3-1-lora-train-20250421-064937 \
--save_steps=10 \
--logging_steps=10 \
--train_template=opena

INFO:google.cloud.aiplatform.training_jobs:View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/2235997787189673984?project=878731303791


LoRA adapter will be saved in: gs://cs4296-tmp-20250421064552-f939/temporal/llama3-1-lora-train-20250421-064937/adapter
Final checkpoint will be saved in: gs://cs4296-tmp-20250421064552-f939/temporal/llama3-1-lora-train-20250421-064937/adapter/node-0/checkpoint-final
