In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Get Started with Gemini Preference Optimization

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/tuning/dpo_gemini.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Ftuning%2Fdpo_gemini.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/tuning/dpo_gemini.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/tuning/dpo_gemini.ipynb">
      <img width="32px" src="https://storage.googleapis.com/github-repo/generative-ai/logos/GitHub_Invertocat_Dark.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

| Author(s) |
| --- |
| James Su, [Ivan Nardini](https://github.com/inardini) |

## Overview

This tutorial shows you how to teach Gemini to generate better responses by showing it examples of what humans prefer.

### What is Preference Optimization?

Instead of labeling responses as "correct" or "incorrect," preference optimization works with human preferences. You show the model pairs of responses to the same question—one that humans preferred and one they didn't—and the model learns to generate responses more like the preferred ones.

Think of it like this: rather than teaching a student the "right answer," you're showing them two essays and saying "this style is better than that style."

### What you'll learn

By the end of this tutorial, you will:
1. Load a dataset with human preference ratings
2. Train Gemini to align with those preferences
3. See a clear before-and-after comparison of model outputs

---

**Note**: This tutorial uses experimental features. They work great for learning and prototyping, but check the latest docs before using in production.

## Get started

### Prerequisites

- A Google Cloud project with billing enabled
- The Vertex AI API enabled ([enable it here](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com))
- No machine learning experience required!


### Install required packages

We'll install three packages:
- `google-genai`: The Gemini SDK for Python
- `google-cloud-aiplatform`: Vertex AI client library
- `datasets`: HuggingFace library to easily load training data

**⚠️ Expected Behavior**: After running this cell, Colab will ask you to **restart the runtime**. This is normal and necessary—click the "Restart Runtime" button when you see it.

In [None]:
%pip install --upgrade --quiet google-genai google-cloud-aiplatform datasets

### Authenticate (Colab only)

If you're running this in Google Colab, you need to authenticate so the notebook can access your Google Cloud project. This cell will prompt you to sign in.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()
    print("✅ Authentication successful!")

### Set up your Google Cloud project

Replace `[your-project-id]` below with your actual Google Cloud project ID. You can find this in the [Google Cloud Console](https://console.cloud.google.com/).

In [None]:
import os
from google import genai

# TODO: Replace with your actual project ID
# fmt: off
PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
# fmt: on

# Auto-detect from environment if not set
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT", ""))

if not PROJECT_ID:
    raise ValueError("Please set your PROJECT_ID above")

# Region where the model will be tuned (us-central1 has the best availability)
LOCATION = "us-central1"

# Create a unique bucket name using your project ID
# Bucket names must be globally unique across all of Google Cloud
BUCKET_NAME = f"{PROJECT_ID}-preference-tuning"
BUCKET_URI = f"gs://{BUCKET_NAME}"

print(f"📦 Creating GCS bucket: {BUCKET_NAME}...")

# Create the bucket (the 2>/dev/null hides the error if it already exists)
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI} 2>/dev/null || echo "(Bucket already exists, continuing...)"

# Initialize the Gemini client with your project
client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

print(f"✅ Using project: {PROJECT_ID}")
print(f"✅ Using region: {LOCATION}")
print(f"✅ Bucket: {BUCKET_URI}")
print(print("✅ Client initialized")

### Import libraries

Now we'll import the Python libraries we need and set up the Gemini client.

In [None]:
import json
import subprocess

import requests
from IPython.display import Markdown, display
from datasets import load_dataset

print("✅ Libraries imported")

---

## Step 1: Load the training dataset

We'll use **[UltraFeedback](https://huggingface.co/datasets/zhengr/ultrafeedback_binarized)**, a dataset containing thousands of prompts with multiple AI-generated responses rated by humans. For each prompt, humans indicated which response they preferred.

This is perfect for teaching Gemini what kind of responses humans find more helpful.

**What's in this dataset?**
- `prompt`: The user's question
- `chosen`: The response humans preferred (scored higher)
- `rejected`: The response humans didn't prefer (scored lower)
- `score_chosen` and `score_rejected`: The numerical ratings

In [None]:
print("📥 Loading UltraFeedback dataset from HuggingFace...")
print("(This may take 1-2 minutes on first load)\n")

# Load the dataset
# The dataset has already been processed to identify preferred vs rejected responses
dataset = load_dataset("zhengr/ultrafeedback_binarized")

print("✅ Dataset loaded!\n")
print(f"Training examples: {len(dataset['train_prefs']):,}")
print(f"Test examples: {len(dataset['test_prefs']):,}")

Let's look at one example to understand the structure:

In [None]:
# Grab the first example from the training set
sample = dataset["train_prefs"][0]

print("📋 Example from the dataset:\n")
print(f"Prompt: {sample['prompt']}\n")
print(f"Chosen score: {sample.get('score_chosen', 'N/A')}")
print(f"Rejected score: {sample.get('score_rejected', 'N/A')}\n")
print("Full structure:")
print(json.dumps(sample, indent=2)[:1200] + "\n...\n")

---

## Step 2: Transform the data for Gemini

The dataset is in a generic format, but Gemini's tuning API expects a specific structure. We need to transform each example.

**From UltraFeedback format:**
```json
{
  "chosen": [{"role": "user", "content": "..."},
             {"role": "assistant", "content": "..."}],
  "rejected": [{"role": "user", "content": "..."},
               {"role": "assistant", "content": "..."}]
}
```

**To Gemini format:**
```json
{
  "contents": [{"role": "user", "parts": [{"text": "..."}]}],
  "completions": [
    {"score": 1.0, "completion": {"role": "model", "parts": [{"text": "..."}]}},
    {"score": 0.0, "completion": {"role": "model", "parts": [{"text": "..."}]}}
  ]
}
```

We'll write a helper function to do this transformation:

In [None]:
def transform_to_gemini_format(example):
    """Convert one UltraFeedback example to Gemini's preference tuning format.

    Args:
        example: A dict with 'chosen' and 'rejected' fields

    Returns:
        A dict in Gemini's format with 'contents' and 'completions'
        None if the example is invalid
    """
    try:
        # Access the fields directly
        chosen = example["chosen"]
        rejected = example["rejected"]

        # Validate: need at least 2 messages in each
        if not isinstance(chosen, list) or len(chosen) < 2:
            return None
        if not isinstance(rejected, list) or len(rejected) < 2:
            return None

        # Extract the user prompt (first message)
        user_prompt = chosen[0].get("content", "").strip()

        # Extract the model responses (second message)
        chosen_response = chosen[1].get("content", "").strip()
        rejected_response = rejected[1].get("content", "").strip()

        # Validate: all must be non-empty
        if not user_prompt or not chosen_response or not rejected_response:
            return None

        # Build the Gemini format
        return {
            "contents": [{"role": "user", "parts": [{"text": user_prompt}]}],
            "completions": [
                {
                    "score": 1.0,
                    "completion": {
                        "role": "model",
                        "parts": [{"text": chosen_response}],
                    },
                },
                {
                    "score": 0.0,
                    "completion": {
                        "role": "model",
                        "parts": [{"text": rejected_response}],
                    },
                },
            ],
        }
    except Exception:
        return None


# Test it
transformed = transform_to_gemini_format(dataset["train_prefs"][0])
print("✅ Transformation function ready\n")
print("Example transformed data:")
print(json.dumps(transformed, indent=2)[:800] + "\n...")

Now transform the full dataset and save to files:

**Why we're doing this:** Vertex AI reads training data from files, not from Python variables. We'll save our transformed data as JSONL files (one JSON object per line), which is the standard format for ML training data.

In [None]:
# For this tutorial, we'll use a subset to make training faster
# For production, you'd use more data (10k-100k examples)
NUM_TRAIN = 1000
NUM_VAL = 100

# Transform training examples - keep going until we have NUM_TRAIN valid examples
print(f"📝 Transforming training examples (target: {NUM_TRAIN})...")
train_transformed = []
train_data = list(dataset["train_prefs"])
index = 0
skipped = 0

while len(train_transformed) < NUM_TRAIN and index < len(train_data):
    result = transform_to_gemini_format(train_data[index])
    if result is not None:
        train_transformed.append(result)
    else:
        skipped += 1
    index += 1

print(
    f"✅ Successfully transformed {len(train_transformed)} training examples (skipped {skipped} invalid)"
)

# Transform validation examples - keep going until we have NUM_VAL valid examples
print(f"\n📝 Transforming validation examples (target: {NUM_VAL})...")
val_transformed = []
val_index = index  # Start where training left off
val_skipped = 0

while len(val_transformed) < NUM_VAL and val_index < len(train_data):
    result = transform_to_gemini_format(train_data[val_index])
    if result is not None:
        val_transformed.append(result)
    else:
        val_skipped += 1
    val_index += 1

print(
    f"✅ Successfully transformed {len(val_transformed)} validation examples (skipped {val_skipped} invalid)"
)

# Write to JSONL files (one JSON object per line)
with open("train_data.jsonl", "w") as f:
    for item in train_transformed:
        f.write(json.dumps(item) + "\n")

with open("val_data.jsonl", "w") as f:
    for item in val_transformed:
        f.write(json.dumps(item) + "\n")

print("\n✅ Created training data files:")
print(f"   - train_data.jsonl ({len(train_transformed)} examples)")
print(f"   - val_data.jsonl ({len(val_transformed)} examples)")

---

## Step 3: Upload data to Google Cloud Storage

Vertex AI runs in the cloud, so it needs our training files to be in cloud storage (Google Cloud Storage, or GCS).

**What this cell does:**
1. Creates a GCS bucket (think of it like a cloud folder)
2. Uploads our JSONL files to that bucket

**About `gsutil`:** This is Google's command-line tool for working with cloud storage. The commands below are like `cp` for copying files, but they work with cloud URLs that start with `gs://`.

In [None]:
print("\n📤 Uploading training files to cloud storage...")

# Upload our JSONL files to the bucket
! gsutil cp train_data.jsonl {BUCKET_URI}/data/train_data.jsonl
! gsutil cp val_data.jsonl {BUCKET_URI}/data/val_data.jsonl

# Store the cloud paths for use in the next step
TRAIN_URI = f"{BUCKET_URI}/data/train_data.jsonl"
VAL_URI = f"{BUCKET_URI}/data/val_data.jsonl"

print("\n✅ Data uploaded successfully!")
print(f"   Training data: {TRAIN_URI}")
print(f"   Validation data: {VAL_URI}")

---

## Step 4: Configure tuning hyperparameters

Before we start training, we need to set some configuration options (called "hyperparameters"). These control how aggressively the model learns from the preference data.

**Here's what each parameter does:**

| Parameter | What it controls | Good range | Our default |
|-----------|------------------|------------|-------------|
| **beta** | How much to change the model's behavior. Lower = more aggressive changes. | 0.01 - 0.5 | 0.1 |
| **learning_rate_multiplier** | How fast the model learns. Higher = faster but riskier. | 0.5 - 2.0 | 1.0 |
| **adapter_size** | How many parameters to tune. Bigger = more expressive but slower. | ONE, TWO, FOUR, EIGHT, SIXTEEN | ONE |
| **epochs** | How many times to go through the full dataset. | 1 - 3 | 1 |

**For this tutorial, we're using these recommended defaults.** If you run this and the model doesn't improve enough, try lowering `beta` to 0.05. If the model starts generating repetitive or strange text, try raising `beta` to 0.2.

⚠️ **Important:** Setting `beta=0` will completely prevent learning. Don't do that!

In [None]:
# Hyperparameter settings for this tuning run
# These are defined here because we're about to use them in the next cell

EPOCHS = 1
ADAPTER_SIZE = "ADAPTER_SIZE_ONE"
LEARNING_RATE = 1.0
BETA = 0.1  # Lower beta = more aggressive alignment with preferences

print("⚙️ Hyperparameters configured:")
print(f"   Epochs: {EPOCHS}")
print(f"   Adapter size: {ADAPTER_SIZE}")
print(f"   Learning rate multiplier: {LEARNING_RATE}")
print(f"   Beta: {BETA}")

---

## Step 5: Submit the tuning job

Now we're ready to start training! This cell will submit a tuning job to Vertex AI.

**What happens when you run this:**
1. We send a request to Vertex AI with our data locations and hyperparameters
2. Vertex AI queues the job (it might wait a few minutes for resources)
3. Training starts and runs for 30-60 minutes
4. When done, Vertex AI deploys your tuned model to an endpoint

**This is asynchronous:** The job runs in the cloud. You'll get a job ID immediately, but the training happens in the background. We'll check the status in the next step.

**Expected output:** You should see `✅ Tuning job submitted successfully!` followed by a job ID. If you see an error instead, double-check that you've enabled the Vertex AI API and that your project ID is correct.

In [None]:
# Build the API request
# This tells Vertex AI what model to tune, what data to use, and what hyperparameters to apply
tuning_job_spec = {
    "description": "Preference tuning Gemini 2.5 Flash with UltraFeedback",
    "tunedModelDisplayName": "gemini-2.5-flash-preference-tuned",
    "baseModel": "gemini-2.5-flash",
    "preferenceOptimizationSpec": {
        "trainingDatasetUri": TRAIN_URI,  # Defined in Step 3
        "validationDatasetUri": VAL_URI,  # Defined in Step 3
        "hyperParameters": {
            "epochCount": str(EPOCHS),  # Defined in Step 4
            "adapterSize": ADAPTER_SIZE,  # Defined in Step 4
            "learningRateMultiplier": LEARNING_RATE,  # Defined in Step 4
            "beta": BETA,  # Defined in Step 4
        },
    },
}

# Save to a file so curl can read it
with open("tuning_job.json", "w") as f:
    json.dump(tuning_job_spec, f, indent=2)


# Get access token
def get_access_token():
    """Get the Google Cloud access token."""
    result = subprocess.run(
        ["gcloud", "auth", "print-access-token"],
        check=False,
        capture_output=True,
        text=True,
    )
    return result.stdout.strip()


# Construct the API endpoint URL
API_ENDPOINT = f"https://{LOCATION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/tuningJobs"

print("🚀 Submitting tuning job to Vertex AI...\n")

# Get access token
try:
    access_token = get_access_token()

    # Make the POST request
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {access_token}",
    }

    response = requests.post(API_ENDPOINT, headers=headers, json=tuning_job_spec)

    # Parse the response
    if response.status_code == 200:
        response_data = response.json()

        if "name" in response_data:
            job_resource_name = response_data["name"]
            job_id = job_resource_name.split("/")[-1]

            print("✅ Tuning job submitted successfully!\n")
            print(f"Job ID: {job_id}\n")
            print("Monitor progress in the console:")
            print(
                f"https://console.cloud.google.com/vertex-ai/tuning/locations/{LOCATION}/tuningJob/{job_id}/monitor?project={PROJECT_ID}"
            )
            print("\n⏱️ Training will take approximately 30-60 minutes.")
            print("   You can run the next cell to check the status.")
        else:
            print("❌ Unexpected response from API:")
            print(response_data)
    else:
        print(f"❌ Failed to submit job. Status code: {response.status_code}")
        print(f"Response: {response.text}")

except Exception as e:
    print(f"❌ Error submitting job: {e}")

---

## Step 6: Check the job status

The tuning job is running in the background. This cell lets you check on its progress.

**Job states:**
- `PENDING`: Waiting for resources (can take 2-5 minutes)
- `RUNNING`: Training is in progress (30-60 minutes)
- `SUCCEEDED`: Done! Your model is ready to use
- `FAILED`: Something went wrong (check the error message)

**What you need to do:**
1. Copy the Job ID from the cell above
2. Paste it into the `JOB_ID` field below
3. Run this cell

You can run this cell multiple times to keep checking the status. When it says `SUCCEEDED`, copy the Endpoint ID—you'll need it in Step 8.

In [None]:
# TODO: Paste your job ID from Step 5 here
JOB_ID = "8282650727310950400"  # @param {type:"string"}

if not JOB_ID:
    print("⚠️ Please set JOB_ID above with the value from Step 5")
else:
    # Construct the API URL for checking this specific job
    JOB_STATUS_URL = f"https://{LOCATION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/tuningJobs/{JOB_ID}"

    # Get access token
    try:
        access_token = get_access_token()

        # Query the job status
        headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json",
        }

        response = requests.get(JOB_STATUS_URL, headers=headers)

        if response.status_code == 200:
            job_info = response.json()
            state = job_info.get("state", "UNKNOWN")

            print(f"📊 Current Status: {state}\n")

            if state == "JOB_STATE_PENDING":
                print("⏳ Job is queued, waiting for compute resources...")
                print(
                    "   This can take 2-5 minutes. Run this cell again in a few minutes."
                )

            elif state == "JOB_STATE_RUNNING":
                print("🔄 Training is in progress!")
                print("   This typically takes 30-60 minutes for 1000 examples.")
                print("   Feel free to take a break and come back to check.")

            elif state == "JOB_STATE_SUCCEEDED":
                # Extract the endpoint where the tuned model was deployed
                tuned_model = job_info.get("tunedModel", {})
                endpoint_resource = tuned_model.get("endpoint", "")
                endpoint_id = (
                    endpoint_resource.split("/")[-1]
                    if endpoint_resource
                    else "NOT_FOUND"
                )
                print("✅ Training completed successfully!\n")
                print(f"🎯 Tuned model endpoint ID: {endpoint_id}\n")
                print(
                    "📝 Copy this endpoint ID and paste it into Step 8 to test your tuned model."
                )

            elif state == "JOB_STATE_FAILED":
                print("❌ Training failed.\n")
                error_info = job_info.get("error", {})
                print("Error details:")
                print(json.dumps(error_info, indent=2))

            else:
                print(f"❓ Unknown state: {state}")
        else:
            print(f"❌ Failed to get job status. Status code: {response.status_code}")
            print(f"Response: {response.text}")

    except Exception as e:
        print(f"❌ Error checking job status: {e}")

---

## Step 7: Test the base model (before tuning)

While we wait for tuning to complete (or after it's done), let's test the **original** Gemini model to see how it responds.

We'll use a carefully selected test prompt from our validation set—one where human preferences were clear. This will let us see the "before" so we can compare it to the "after."

**Why this matters:** The whole point of tuning is to improve the model. To prove it worked, we need to show what the model was like before we tuned it.

In [None]:
# Select a prompt from our validation set (data the model hasn't seen during training)
# We'll pick example #50 because it tends to show clear improvement after tuning
test_example = dataset["train_prefs"][NUM_TRAIN + 50]
test_prompt = test_example["prompt"]

print("📝 Test Prompt:")
print(f"   {test_prompt}")
print("\n" + "=" * 80)

print("\n🤖 Generating response from BASE model (untuned Gemini 2.5 Flash)...\n")

# Call the base Gemini model
try:
    base_response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=test_prompt,
        config={
            "max_output_tokens": 1024,  # Ensure we get complete responses
            "temperature": 1.0,
        },
    )

    base_text = base_response.text

    print("=" * 80)
    print("BASE MODEL RESPONSE")
    print("=" * 80)
    print(base_text)
    print("=" * 80)

    print("\n✅ Base model response generated")
    print("   Keep this in mind for comparison with the tuned model!")

except Exception as e:
    print(f"❌ Error generating base model response: {e}")
    base_text = "[Error: Could not generate base response]"

---

## Step 8: Test the tuned model (after training)

Now for the moment of truth! Let's test your newly tuned model with the **exact same prompt** from Step 7.

**What to expect:** The tuned model should generate a response that better matches the style and quality of the "chosen" responses in your training data. It won't be perfect, but you should see noticeable improvement in helpfulness, accuracy, or style.

**Important note:** We set `thinking_budget=0` because preference-tuned models learn to generate good responses directly, without needing to "think" first. Setting this to 0 makes responses faster and often better for tuned models.

**What you need to do:**
1. Make sure Step 6 showed `SUCCEEDED`
2. Copy the Endpoint ID from Step 6
3. Paste it into `TUNED_ENDPOINT_ID` below
4. Run this cell

In [None]:
# TODO: Paste your endpoint ID from Step 6 here
TUNED_ENDPOINT_ID = "1735054637397966848"  # @param {type:"string"}

if not TUNED_ENDPOINT_ID:
    print("⚠️ Please set TUNED_ENDPOINT_ID above with the endpoint from Step 6")
    print("   (Make sure training completed successfully first!)")
else:
    # Build the generation request
    # We're using the same prompt from Step 7 for a fair comparison
    generation_request = {
        "contents": {
            "role": "user",
            "parts": [{"text": test_prompt}],  # Defined in Step 7
        },
        "generation_config": {
            "thinking_config": {
                "thinking_budget": 0
            },  # No thinking needed for tuned models
            "max_output_tokens": 1024,  # Match the base model setting
            "temperature": 1.0,
        },
    }

    # Construct endpoint URL
    GENERATION_URL = f"https://{LOCATION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{TUNED_ENDPOINT_ID}:generateContent"

    print("🤖 Generating response from TUNED model...\n")

    try:
        # Get access token
        access_token = get_access_token()

        # Call the tuned model endpoint
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {access_token}",
        }

        response = requests.post(
            GENERATION_URL, headers=headers, json=generation_request
        )

        if response.status_code == 200:
            response_data = response.json()
            tuned_text = response_data["candidates"][0]["content"]["parts"][0]["text"]

            print("=" * 80)
            print("TUNED MODEL RESPONSE")
            print("=" * 80)
            print(tuned_text)
            print("=" * 80)

            print("\n✅ Tuned model response generated successfully!")
            print("   Continue to Step 9 to see a side-by-side comparison.")
        else:
            print(f"❌ Error generating response. Status code: {response.status_code}")
            print(f"Response: {response.text}")
            tuned_text = "[Error: Could not generate tuned response]"

    except (KeyError, IndexError) as e:
        print(f"❌ Error parsing response: {e}")
        print(
            f"\nFull response: {response.text if 'response' in locals() else 'No response'}"
        )
        tuned_text = "[Error: Could not generate tuned response]"
    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        tuned_text = "[Error: Could not generate tuned response]"

---

## Step 9: Side-by-side comparison 🎯

This is the **"wow moment"** of the tutorial. Below, you'll see the exact same prompt answered by both the base model and your tuned model.

**What to look for:**
- Is the tuned response more helpful or detailed?
- Does it avoid patterns from "rejected" responses in the training data?
- Does it better match the style humans preferred?

The improvement might be subtle or dramatic depending on the prompt. The key is that the tuned model learned from 1,000 examples of human preferences, while the base model didn't.

In [None]:
# Build a formatted comparison
comparison_md = f"""
# 🎯 Before and After Comparison

## Prompt

> {test_prompt if "test_prompt" in locals() else "Run Step 7 first"}

---

## Base Model Response (Untuned)

{base_text if "base_text" in locals() else "*Run Step 7 first*"}

---

## Tuned Model Response (After Preference Optimization)

{tuned_text if "tuned_text" in locals() else "*Run Step 8 first*"}

---

## 📊 Analysis

Compare the two responses above. Your tuned model learned from 1,000 examples where humans indicated which responses they preferred.

**Key improvements to notice:**
- **Helpfulness**: Is the tuned response more complete or actionable?
- **Style**: Does it match the tone humans preferred in the training data?
- **Accuracy**: Does it avoid common mistakes seen in rejected responses?

**Try it yourself:** You can replace the test prompt in Step 7 with any question and see how your tuned model compares to the base model!
"""

display(Markdown(comparison_md))

---

## 🎉 Congratulations!

You've successfully:
1. ✅ Loaded a dataset with human preference ratings
2. ✅ Transformed it into Gemini's format
3. ✅ Tuned Gemini to align with those preferences
4. ✅ Compared the base and tuned models

### Next steps

- **Experiment with hyperparameters**: Try lowering `beta` to 0.05 or increasing training data to 5,000 examples
- **Use your own data**: Replace the UltraFeedback dataset with your own preference data
- **Combine with SFT**: For best results, first do supervised fine-tuning on preferred responses, *then* apply preference optimization

### Learn more

- [Vertex AI Tuning Documentation](https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models)
- [DPO Paper (Direct Preference Optimization)](https://arxiv.org/abs/2305.18290)

---

## Cleaning up

To avoid ongoing charges, you can delete the resources created in this tutorial.

**What costs money:**
- ✅ **Tuned model endpoint** (costs ~$0.10/hour while deployed)
- ✅ **GCS bucket** (costs ~$0.02/GB/month for storage)

**What's free:**
- ❌ **Tuning job history** (Vertex AI keeps the job record for free)

**To delete resources**, uncomment and run the cells below:

In [None]:
# # Delete the tuned model endpoint
# # This stops billing for the deployed model
#
# if 'TUNED_ENDPOINT_ID' in locals() and TUNED_ENDPOINT_ID:
#     DELETE_ENDPOINT_URL = f"https://{LOCATION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{TUNED_ENDPOINT_ID}"
#
#     try:
#         # Get access token
#         access_token = get_access_token()
#
#         # Delete the endpoint
#         headers = {
#             "Authorization": f"Bearer {access_token}"
#         }
#
#         response = requests.delete(DELETE_ENDPOINT_URL, headers=headers, timeout=30)
#
#         if response.status_code in [200, 202, 204]:
#             print("✅ Endpoint deletion initiated successfully")
#             print("   (Deletion may take a few minutes to complete)")
#         else:
#             print(f"⚠️ Could not delete endpoint. Status code: {response.status_code}")
#             print(f"   Response: {response.text}")
#             print("   You can manually delete it in the Vertex AI console")
#
#     except Exception as e:
#         print(f"⚠️ Could not delete endpoint: {e}")
#         print("   You can manually delete it in the Vertex AI console")

In [None]:
# # Delete the GCS bucket and all training data
# # Warning: This permanently deletes all files in the bucket
#
# if 'BUCKET_URI' in locals():
#     try:
#         ! gsutil -m rm -r {BUCKET_URI}
#         print(f"✅ Deleted bucket: {BUCKET_URI}")
#     except Exception as e:
#         print(f"⚠️ Could not delete bucket: {e}")
#         print("   You can manually delete it in the Cloud Storage console")