In [None]:
# @title Copyright & License (click to expand)
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Tuning a PEFT model

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/tune_peft.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/tune_peft.ipynb">
        <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
    <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/generative_ai/tune_peft.ipynb">
       <img src="https://www.gstatic.com/cloud/images/navigation/vertex-ai.svg" alt="Vertex AI logo">Open in Vertex AI Workbench
    </a>
</table>

## Overview

This tutorial demonstrates how to use Vertex AI to tune a PEFT large-language model (LLM) and make a prediction. This workflow improves a model's accuracy by fine-tuning a base model with a training dataset.

Learn more about [Tune text models by using supervised tuning](https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-text-models-supervised).

### Objective

In this tutorial, you learn to use `Vertex AI LLM` to tune and deploy a PEFT large language model.


This tutorial uses the following Google Cloud ML services:

- `Vertex AI LLM`
- `Vertex AI Model Garden`
- `Vertex AI Prediction`


The steps performed include:

- Get the Vertex AI LLM model.
- Tune the model.
  - This will automatically create a Vertex AI endpoint and deploy the model to it.
- Make a prediction using `Vertex AI LLM`.
- Make a prediction using `Vertex AI Prediction`

### Model

The pre-trained LLM model is a BISON (Decoder only) model for text generation.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage
pricing](https://cloud.google.com/storage/pricing), and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Installation

Install the following packages required to execute this notebook.

In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform  "shapely<2.0.0"

### Colab only: Uncomment the following cell to restart the kernel

In [None]:
# Automatically restart kernel after installs so that your environment can access the new packages
# import IPython

# app = IPython.Application.instance()
# app.kernel.do_shutdown(True)

## Before you begin

### Set your project ID

**If you don't know your project ID**, try the following:
* Run `gcloud config list`.
* Run `gcloud projects list`.
* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = "us-central1"  # @param {type:"string"}

### Authenticate your Google Cloud account

Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below.

**1. Vertex AI Workbench**
* Do nothing as you are already authenticated.

**2. Local JupyterLab instance, uncomment and run:**

In [None]:
# ! gcloud auth login

**3. Colab, uncomment and run:**

In [None]:
# from google.colab import auth
# auth.authenticate_user()

**4. Service account or other**
* See how to grant Cloud Storage permissions to your service account at https://cloud.google.com/storage/docs/gsutil/commands/iam#ch-examples.

### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

In [None]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

#### Service Account

You use a service account to create Vertex AI Pipeline jobs.

In [None]:
SERVICE_ACCOUNT = ""  # @param {type:"string"}

In [None]:
import sys

IS_COLAB = "google.colab" in sys.modules
if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your service account from gcloud
    if not IS_COLAB:
        shell_output = !gcloud auth list 2>/dev/null
        SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

    else:  # IS_COLAB:
        shell_output = ! gcloud projects describe $PROJECT_ID --format="value(projectNumber)"
        project_number = shell_output[0]
        SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

    print("Service Account:", SERVICE_ACCOUNT)

#### Set service account access for Vertex AI Pipelines

Run the following commands to grant your service account access to read and write pipeline artifacts in the bucket that you created in the previous step. You only need to run this step once per service account.

In [None]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

### Import libraries and define constants

In [None]:
import google.cloud.aiplatform as aiplatform
from vertexai.preview.language_models import (TextGenerationModel,
                                              TuningEvaluationSpec)

## Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

### Load pretrained model

Load the pretrained BISON model from Vertex AI LLM Model Garden.

In [None]:
model = TextGenerationModel.from_pretrained("google/text-bison@001")

model.list_tuned_model_names()

### Tune the model

Next, you tune the model using the `tune_model()` method, with the following parameters:

`training_data`: A pandas Dataframe or Cloud Storage location of the training data for tuning the model.<br>
`learning_rate_multiplier`: A multiplier to apply to the recommended learning rate. To use the recommended learning rate, use 1.0. <br>
`train_steps`: The number of steps to run for model tuning. The batch size varies by tuning location:<br>
- us-central1 has a batch size of 8.
- europe-west4 has a batch size of 24.<br>

If there are 240 examples in a training dataset, in europe-west4, it takes 240 / 24 = 10 steps to process the entire dataset once. In us-central1, it takes 240 / 8 = 30 steps to process the entire dataset once. The default value is 300.<br>

`tuning_job_location`: The region where the tuning job should be run. Supported regions are: `us-central1` and `europe-west4`.<br>
`tuned_model_location`: The region where the tuned model should be deployed.

In [None]:
! gsutil cp gs://cloud-samples-data/vertex-ai/model-evaluation/peft_eval_sample.jsonl {BUCKET_URI}/peft_eval_sample.jsonl
! gsutil cp gs://cloud-samples-data/vertex-ai/model-evaluation/peft_train_sample.jsonl {BUCKET_URI}/peft_train_sample.jsonl

tuning_evaluation_spec = TuningEvaluationSpec(
    evaluation_data=f"{BUCKET_URI}/peft_eval_sample.jsonl",
    evaluation_interval=20,
    enable_early_stopping=True,
)
model.tune_model(
    training_data=f"{BUCKET_URI}/peft_train_sample.jsonl",
    # set to 1 for fast iteration and demo purpose. For this sample training dataset, we recommend at least 100 steps
    train_steps=100,
    learning_rate_multiplier=1.0,
    tuning_job_location="europe-west4",
    tuned_model_location="us-central1",  # Only us-central1 is supported
    model_display_name="test_model",
    tuning_evaluation_spec=tuning_evaluation_spec,
)

### Make a prediction with Vertex AI LLM

Now, make a prediction using the `predict()` method from the Vertex AI LLM interface.

In [None]:
prompt = "TRANSCRIPT: \nPROCEDURE PERFORMED: , Umbilical hernia repair.,PROCEDURE:,  After informed consent was obtained, the patient was brought to the operative suite and placed supine on the operating table.  The patient was sedated, and an adequate local anesthetic was administered using 1% lidocaine without epinephrine.  The patient was prepped and draped in the usual sterile manner.,A standard curvilinear umbilical incision was made, and dissection was carried down to the hernia sac using a combination of Metzenbaum scissors and Bovie electrocautery.  The sac was cleared of overlying adherent tissue, and the fascial defect was delineated.  The fascia was cleared of any adherent tissue for a distance of 1.5 cm from the defect.  The sac was then placed into the abdominal cavity and the defect was closed primarily using simple interrupted 0 Vicryl sutures.  The umbilicus was then re-formed using 4-0 Vicryl to tack the umbilical skin to the fascia.,The wound was then irrigated using sterile saline, and hemostasis was obtained using Bovie electrocautery.  The skin was approximated with 4-0 Vicryl in a subcuticular fashion.  The skin was prepped with benzoin, and Steri-Strips were applied.  A dressing was then applied.  All surgical counts were reported as correct.,Having tolerated the procedure well, the patient was subsequently taken to the recovery room in good and stable condition.\n\n LABEL: "

In [None]:
print(model.predict(prompt))

### Get the deployed Vertex AI Endpoint resource

Next, get the Vertex AI Endpoint resource that the model was automatically deployed to.

In [None]:
endpoint = aiplatform.Endpoint(model._endpoint.resource_name)
print(endpoint)

### Make a prediction using Vertex AI Prediction

Now, make a prediction using the `predict()` method from the Vertex AI Prediction interface, with the following parameters:

- `instances`: A list of one or more instances for prediction. Each instance has the format:
    - { "content": the_text_input }
- `parameters`: Parameters passed to the model for the model's predict method. The corresponding examples is default values.

In [None]:
endpoint.predict(
    instances=[{"prompt": prompt}],
    parameters={
        "temperature": 0.0,
        "maxDecodeSteps": 128,
        "topP": 0.95,
        "topK": 40,
    },
)

### Run Post-Tuning Evaluation

Note that the format of `ground_truth_data` should be a JSONL file where each line is a json of the following format:

```
{
  "prompt": "your input/prompt text",
  "ground_truth": "your ground truth output text"
}
```

* "prompt" corresponds to the "input_text" in the train dataset. This is needed for batch prediction
* "ground_truth" corresponds to the "output_text" in the train dataset. This is needed for evaluation.


In [None]:
from vertexai.preview.language_models import EvaluationTextGenerationSpec

tuned_model = model

# Uncomment the following to load a tuned model if the tuning session is broken
# tuned_model = TextGenerationModel.from_pretrained("google/text-bison@001")
# tuned_model.get_tuned_model(f'projects/{PROJECT_ID}/locations/us-central1/models/3890975937629519872')

# Text generation example
evaluation_task_spec = EvaluationTextGenerationSpec(
    ground_truth_data=[
        "gs://cloud-samples-data/vertex-ai/model-evaluation/peft_test_sample.jsonl"
    ]
)

tuned_model.evaluate(task_spec=evaluation_task_spec)

## Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial.

In [None]:
import os

delete_bucket = False

endpoint.undeploy_all()
endpoint.delete()

if delete_bucket or os.getenv("IS_TESTING"):
    ! gsutil rm -rf {BUCKET_URI}