In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex Prompt Optimizer Notebook SDK (Preview)

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fprompts%2Fprompt_optimizer%2Fvertex_ai_prompt_optimizer_sdk.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb">
      <img width="32px" src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>
    

| | | |
|-|-|-|
|Author | [Ivan Nardini](https://github.com/inardini)

##  I. Overview

In the context of developing Generative AI (Gen AI) applications, prompt engineering poses challenges due to its time-consuming and error-prone nature. You often dedicate significant effort to crafting and inputting prompts to achieve successful task completion. Additionally, with the frequent release of foundational models, you face the additional burden of migrating working prompts from one model version to another.

Vertex AI Prompt Optimizer aims to alleviate these challenges by providing you with an intelligent prompt optimization tool. With this tool you can both refine optimize system instruction (and task) in the prompts and selects the best demonstrations (few-shot examples) for prompt templates, empowering you to shape LLM responses from any source model to on a target Google model.


### Objective

This notebook demostrates how to leverage Vertex AI Prompt Optimizer (Preview) to optimize a simple prompt for a Gemini model using your own metrics. The goal is to use Vertex AI Prompt Optimizer (Preview) to find the new prompt template which generate the most correct and grounded responses.

This tutorial uses the following Google Cloud ML services and resources:

- Vertex Gen AI
- Vertex AI Prompt Optimizer (Preview)
- Vertex AI Model Eval
- Vertex AI Custom job

The steps performed include:

- Prepare the prompt-ground truth pairs optimized for another model
- Define the prompt template you want to optimize
- Set target model and evaluation metric
- Set optimization mode and steps
- Run the automatic prompt optimization job
- Collect the best prompt template and eval metric
- Validate the best prompt template

### Dataset

The dataset is a question-answering dataset generated by  a simple AI cooking assistant that provides suggestions on how to cook healthier dishes.


### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage pricing](https://cloud.google.com/storage/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## II. Before you start

### Install Vertex AI SDK for Python and other required packages


In [None]:
%pip install --upgrade --quiet 'google-cloud-aiplatform[evaluation]'
%pip install --upgrade --quiet 'plotly'
%pip install --upgrade --quiet 'asyncio' 'tqdm' 'tenacity' 'etils' 'importlib_resources' 'fsspec' 'gcsfs' 'nbformat>=4.2.0'

In [None]:
! mkdir -p ./tutorial/utils && wget https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/utils/helpers.py -P ./tutorial/utils

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

#### Set your project ID and project number

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

In [None]:
PROJECT_NUMBER = !gcloud projects describe {PROJECT_ID} --format="get(projectNumber)"[0]
PROJECT_NUMBER = PROJECT_NUMBER[0]

#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = "us-central1"  # @param {type: "string"}

#### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

In [None]:
BUCKET_NAME = "your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

BUCKET_URI = f"gs://{BUCKET_NAME}"  # @param {type:"string"}

In [None]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

#### Service Account and permissions

Vertex AI Automated Prompt Design requires a service account with the following permissions:

-   `Vertex AI User` to call Vertex LLM API
-   `Storage Object Admin` to read and write to your GCS bucket.
-   `Artifact Registry Reader` to download the pipeline template from Artifact Registry.

[Check out the documentation](https://cloud.google.com/iam/docs/manage-access-service-accounts#iam-view-access-sa-gcloud) to know how to grant those permissions to a single service account. 

**Important**: If you run following commands using Vertex AI Workbench, please directly run in the terminal. 

In [None]:
SERVICE_ACCOUNT = f"{PROJECT_NUMBER}-compute@developer.gserviceaccount.com"

In [None]:
for role in ['aiplatform.user', 'storage.objectAdmin', 'artifactregistry.reader']:

    ! gcloud projects add-iam-policy-binding {PROJECT_ID} \
      --member=serviceAccount:{SERVICE_ACCOUNT} \
      --role=roles/{role} --condition=None

### Set tutorial folder and workspace

Set a folder to collect data and any tutorial artifacts.

In [None]:
from pathlib import Path as path

ROOT_PATH = path.cwd()
TUTORIAL_PATH = ROOT_PATH / "tutorial"
CONFIG_PATH = TUTORIAL_PATH / "config"
TUNED_PROMPT_PATH = TUTORIAL_PATH / "tuned_prompts"

TUTORIAL_PATH.mkdir(parents=True, exist_ok=True)
CONFIG_PATH.mkdir(parents=True, exist_ok=True)
TUNED_PROMPT_PATH.mkdir(parents=True, exist_ok=True)

Set the associated workspace on Cloud Storage bucket.

In [None]:
from etils import epath

WORKSPACE_URI = epath.Path(BUCKET_URI) / "prompt_migration_gemini"
INPUT_DATA_URI = epath.Path(WORKSPACE_URI) / "data"

WORKSPACE_URI.mkdir(parents=True, exist_ok=True)
INPUT_DATA_URI.mkdir(parents=True, exist_ok=True)

### Import libraries

In [None]:
# Tutorial
from argparse import Namespace
import json

# General
import logging
import warnings

from google.cloud import aiplatform
import pandas as pd
from tutorial.utils.helpers import (
    async_generate,
    display_eval_report,
    evaluate_task,
    get_id,
    get_optimization_result,
    get_results_file_uris,
    init_new_model,
    plot_eval_metrics,
    print_df_rows,
)

### Libraries settings

In [None]:
warnings.filterwarnings("ignore")
logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project.

In [None]:
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

### Define constants

In [None]:
INPUT_DATA_FILE_URI = "gs://github-repo/prompts/prompt_optimizer/rag_qa_dataset.jsonl"

EXPERIMENT_NAME = "qa-prompt-eval"
INPUT_TUNING_DATA_URI = epath.Path(WORKSPACE_URI) / "tuning_data"
INPUT_TUNING_DATA_FILE_URI = str(INPUT_DATA_URI / "prompt_tuning.jsonl")
OUTPUT_TUNING_DATA_URI = epath.Path(WORKSPACE_URI) / "tuned_prompt"
APD_CONTAINER_URI = (
    "us-docker.pkg.dev/vertex-ai-restricted/builtin-algorithm/apd:preview_v1_0"
)
CONFIG_FILE_URI = str(WORKSPACE_URI / "config" / "config.json")

## III. Automated prompt design with Vertex AI Prompt Optimizer (Preview)

### Load the dataset

Load the dataset from Cloud Storage bucket.

In [None]:
prompt_tuning_df = pd.read_json(INPUT_DATA_FILE_URI, lines=True)

In [None]:
prompt_tuning_df.head()

In [None]:
print_df_rows(prompt_tuning_df, n=1)

### Evaluate the previous model version in question-answering task

Run an evaluation using Vertex AI Gen AI Evaluation Service to define question-answering baseline.

In [None]:
evaluation_qa_results = [
    (
        "qa_eval_result_old_model",
        evaluate_task(
            df=prompt_tuning_df,
            prompt_col="prompt",
            reference_col="reference",
            response_col="answer",
            experiment_name=EXPERIMENT_NAME,
            eval_metrics=["question_answering_quality", "groundedness"],
            eval_sample_n=len(prompt_tuning_df),
        ),
    )
]

Plot the evaluation metrics.

In [None]:
plot_eval_metrics(evaluation_qa_results)

### Optimize the prompt template with Vertex AI Prompt Optimizer (Preview)


#### Prepare the prompt template you want to optimize

A prompt consists of two key parts:

* **System Instruction Template** which is a fixed part of the prompt shared across all queries for a given task.

* **Prompt Template** which is a dynamic part of the prompt that changes based on the task.

Vertex AI Prompt Optimizer enables the translation and optimization of the Instruction Template, while the Task/Context Template remains essential for evaluating different instruction templates.

In this case, you want to enhance or optimize a simple prompt template.


In [None]:
SYSTEM_INSTRUCTION_TEMPLATE = """
Given a question with some context, provide the correct answer to the question.
"""

PROMPT_TEMPLATE = """
Some examples of correct answer to a question with context are:
Question: {{question}}
Answer: {{target}}
"""

#### Prepare few samples

Vertex AI Prompt optimizer requires a CSV or JSONL file containing labeled samples.

For **prompt optimization**:

* Focus on examples that specifically demonstrate the issues you want to address.
* Recommendation: Use 50-100 distinct samples for reliable results. However, the tool can still be effective with as few as 5 samples.

For **prompt translation**:

* Consider using the source model to label examples that the target model struggles with, helping to identify areas for improvement.

Learn more about setting up your CSV or JSONL file as input [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer).

In [None]:
prepared_prompt_tuning_df = prompt_tuning_df.copy()

# Prepare question and target columns
prepared_prompt_tuning_df["question"] = (
    prepared_prompt_tuning_df["user_question"]
    + "\nnContext:\n"
    + prepared_prompt_tuning_df["context"]
)
prepared_prompt_tuning_df = prepared_prompt_tuning_df.rename(
    columns={"reference": "target"}
)

# Remove uneccessary columns
prepared_prompt_tuning_df = prepared_prompt_tuning_df.drop(
    columns=["user_question", "context", "prompt", "answer"]
)

In [None]:
prepared_prompt_tuning_df.head()

#### Upload samples to bucket

Once you prepare samples, you can upload them on Cloud Storage bucket.

In [None]:
prepared_prompt_tuning_df.to_json(
    INPUT_TUNING_DATA_FILE_URI, orient="records", lines=True
)

#### Configure optimization settings

Vertex AI Prompt Optimizer allows you to optimize prompts by optimizing instructions only, demonstration only, or both (`optimization_mode`), and after you set the system instruction, prompt templates that will be optimized  (`system_instruction`, `prompt_template`), and the model you want to optimize for  (`target_model`), it allows to condition the optimization process by setting metrics, number of iterations used to improve the prompt and more.

Below you have some configurations as default that are most commonly used and recommended. And if you want to have more control of the optimization process, Vertex AI Prompt Optimizer (Preview) provides also additional configurations. Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer) to learn more about the different parameters settings and how to best utilize them.


In [None]:
PROMPT_OPTIMIZATION_JOB = "auto-prompt-design-job-" + get_id()
OUTPUT_TUNING_RUN_URI = str(OUTPUT_TUNING_DATA_URI / PROMPT_OPTIMIZATION_JOB)

args = Namespace(
    # Basic configuration
    system_instruction=SYSTEM_INSTRUCTION_TEMPLATE,
    prompt_template=PROMPT_TEMPLATE,
    target_model="gemini-1.5-flash-001",  # Supported models: "gemini-1.0-pro-001", "gemini-1.0-pro-002", "gemini-1.5-flash-001", "gemini-1.5-pro-001", "gemini-1.0-ultra-001", "text-bison@001", "text-bison@002", "text-bison32k@002", "text-unicorn@001"
    optimization_mode="instruction",  # Supported modes: "instruction", "demonstration", "instruction_and_demo"
    num_steps=3,
    num_template_eval_per_step=2,
    num_demo_set_candidates=3,
    demo_set_size=2,
    input_data_path=INPUT_TUNING_DATA_FILE_URI,
    output_path=OUTPUT_TUNING_RUN_URI,
    project=PROJECT_ID,
    # Advanced configuration
    target_model_qps=1,
    target_model_location="us-central1",
    source_model="",
    source_model_qps="",
    source_model_location="",
    optimizer_model="gemini-1.5-pro-001",  # Supported models: "gemini-1.0-pro-001", "gemini-1.0-pro-002", "gemini-1.5-flash-001", "gemini-1.5-pro-001", "gemini-1.0-ultra-001", "text-bison@001", "text-bison@002", "text-bison32k@002", "text-unicorn@001"
    optimizer_model_qps=1,
    optimizer_model_location="us-central1",
    eval_model="gemini-1.5-pro-001",  # Supported models: "gemini-1.0-pro-001", "gemini-1.0-pro-002", "gemini-1.5-flash-001", "gemini-1.5-pro-001", "gemini-1.0-ultra-001", "text-bison@001", "text-bison@002", "text-bison32k@002", "text-unicorn@001"
    eval_qps=1,
    eval_model_location="us-central1",
    eval_metrics_types=[
        "question_answering_correctness",
        "groundedness",
    ],  # Supported metrics: "bleu", "coherence", "exact_match", "fluidity", "fulfillment", "groundedness", "rouge_1", "rouge_2", "rouge_l", "rouge_l_sum", "safety", "question_answering_correctness", "question_answering_helpfulness", "question_answering_quality", "question_answering_relevance", "summarization_helpfulness", "summarization_quality", "summarization_verbosity", "tool_name_match", "tool_parameter_key_match", "tool_parameter_kv_match"
    eval_metrics_weights=[0.9, 0.1],
    aggregation_type="weighted_sum",  # Supported aggregation types: "weighted_sum", "weighted_average"
    data_limit=50,
    response_mime_type="application/json",
    response_schema="",
    language="English",  # Supported languages: "English", "French", "German", "Hebrew", "Hindi", "Japanese", "Korean", "Portuguese", "Simplified Chinese", "Spanish", "Traditional Chinese"
    placeholder_to_content=json.loads("{}"),
)

#### Upload Vertex AI Prompt Optimizer (Preview) config to Cloud Storage

After you define Vertex AI Prompt Optimizer (Preview) configuration, you upload them on Cloud Storage bucket.


Now you can save the config to the bucket.

In [None]:
args = vars(args)

with epath.Path(CONFIG_FILE_URI).open("w") as config_file:
    json.dump(args, config_file)
config_file.close()

#### Run the automatic prompt optimization job

Now you are ready to run your first Vertex AI Prompt Optimizer (Preview) job using the Vertex AI SDK for Python.

**Important:** Be sure you have provisioned enough queries per minute (QPM) quota and the recommended QPM for each model. If you configure the Vertex AI prompt optimizer with a QPM that is higher than the QPM than you have access to, the job will fail. 

[Check out](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#before-you-begin) the documentation to know more. 


In [None]:
WORKER_POOL_SPECS = [
    {
        "machine_spec": {
            "machine_type": "n1-standard-4",
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": APD_CONTAINER_URI,
            "args": ["--config=" + CONFIG_FILE_URI],
        },
    }
]

custom_job = aiplatform.CustomJob(
    display_name=PROMPT_OPTIMIZATION_JOB,
    worker_pool_specs=WORKER_POOL_SPECS,
)

custom_job.run(service_account=SERVICE_ACCOUNT)

### Collect the optimization results

Vertex AI Prompt Optimizer returns both optimized templates and evaluation results for either instruction, or demostrations, or both depending on the optimization mode you define as JSONL files on Cloud Storage bucket. Those results help you understand the optimization process.

In this case, you want to collect the optimized templates and evaluation results for the instruction.

Below you use a helper function to read those results.


In [None]:
apd_result_uris = get_results_file_uris(
    output_uri=OUTPUT_TUNING_RUN_URI,
    required_files=["eval_results.json", "templates.json"],
)

#### Get the best system instruction

Below you have the optimal system instruction template and the associated evaluation metrics.

In [None]:
best_prompt_df, prompt_summary_df, prompt_metrics_df = get_optimization_result(
    apd_result_uris["instruction_templates"],
    apd_result_uris["instruction_eval_results"],
)

display_eval_report(
    (best_prompt_df, prompt_summary_df, prompt_metrics_df),
    prompt_component="instruction",
)

### Validate and Evaluate the optimized template in question-answering task


#### Generate new responses using the optimized template

Finally, you generate the new responses with the optimized template. Below you can see an example of a generated response using the optimized system instructions template.

In [None]:
optimized_prompt_template = (
    best_prompt_df["prompt"].iloc[0]
    + "\nQuestion: \n{question}"
    + "\nContext: \n{context}"
)

optimized_prompts = [
    optimized_prompt_template.format(question=q, context=c)
    for q, c in zip(
        prompt_tuning_df["user_question"].to_list(),
        prompt_tuning_df["context"].to_list(),
    )
]

prompt_tuning_df["optimized_prompt_with_vapo"] = optimized_prompts

In [None]:
gemini_llm = init_new_model("gemini-1.5-flash-001")

gemini_predictions = [async_generate(p, model=gemini_llm) for p in optimized_prompts]

gemini_predictions_col = await tqdm_asyncio.gather(*gemini_predictions)

prompt_tuning_df["gemini_answer_with_vapo"] = gemini_predictions_col

In [None]:
print_df_rows(prompt_tuning_df, n=1)

#### Evaluate new responses using Vertex AI Gen AI Evaluation

And you use the generated responses with the optimized prompt to run a new round of evaluation with Vertex AI Gen AI Evaluation.


In [None]:
evaluation_qa_results.append(
    (
        "qa_eval_result_new_model_with_vapo",
        evaluate_task(
            df=prompt_tuning_df,
            prompt_col="optimized_prompt_with_vapo",
            reference_col="reference",
            response_col="gemini_answer_with_vapo",
            experiment_name=EXPERIMENT_NAME,
            eval_metrics=["question_answering_quality", "groundedness"],
            eval_sample_n=len(prompt_tuning_df),
        ),
    )
)

In [None]:
plot_eval_metrics(evaluation_qa_results)

## IV. Clean up

In [None]:
delete_bucket = False
delete_job = False
delete_experiment = False
delete_tutorial = False

if delete_bucket:
    ! gsutil rm -r $BUCKET_URI

if delete_job:
    custom_job.delete()

if delete_experiment:
    experiment = aiplatform.Experiment(experiment_name=EXPERIMENT_NAME)
    experiment.delete()

if delete_tutorial:
    import shutil

    shutil.rmtree(str(TUTORIAL_PATH))