In [None]:
!pip -q install --upgrade google-cloud-aiplatform vertexai pytest

[0m

In [None]:
import os, re, textwrap, datetime
import pandas as pd
import numpy as np
import vertexai
from vertexai.preview.language_models import ChatModel, InputOutputTextPair

In [None]:

# --- Project / Location ---
# If running inside a Google Cloud Skills Boost lab, PROJECT_ID is usually set.
PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
if not PROJECT_ID:
    # fallback to gcloud config
    PROJECT_ID = !gcloud config get-value project
    PROJECT_ID = PROJECT_ID[0]

LOCATION = "us-central1"
print("PROJECT_ID:", PROJECT_ID)
print("LOCATION:", LOCATION)

# --- Vertex AI Init ---
import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig

vertexai.init(project=PROJECT_ID, location=LOCATION)

# You can switch models if your lab specifies a "latest" one available to you.
MODEL_NAME = "gemini-2.5-flash"
model = GenerativeModel(MODEL_NAME)

# Deterministic generation for tests
GEN_CONFIG_DETERMINISTIC = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    max_output_tokens=64
)

# Slightly more flexible for social posts (still stable-ish)
GEN_CONFIG_POSTS = GenerationConfig(
    temperature=0.2,
    top_p=0.95,
    max_output_tokens=128
)


PROJECT_ID: qwiklabs-gcp-04-a859397f9965
LOCATION: us-central1


In [None]:
# ============================================================
# 1) Function: classify_question
# Categories required by workshop:
# Employment, General Information, Emergency Services, Tax Related
# (Workshop Challenge 3 instructions) :contentReference[oaicite:1]{index=1}
# ============================================================

%%writefile app.py
import re
import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig

ALLOWED_CATEGORIES = {"Employment", "General Information", "Emergency Services", "Tax Related"}

GEN_CONFIG_DETERMINISTIC = GenerationConfig(temperature=0.0, top_p=1.0, max_output_tokens=256)
GEN_CONFIG_POSTS = GenerationConfig(temperature=0.2, top_p=0.95, max_output_tokens=256)

def init_model(project_id: str, location: str = "us-central1", model_name: str = "gemini-2.5-flash"):
    vertexai.init(project=project_id, location=location)
    return GenerativeModel(model_name)

def _safe_text(resp) -> str:
    """
    Returns response text if present; otherwise returns "".
    NEVER raises on empty/blocked candidates.
    """
    # 1) Try resp.text but protect against ValueError
    try:
        t = getattr(resp, "text", None)
        if t:
            return t.strip()
    except Exception:
        pass

    # 2) Try candidates[0].content.parts[].text
    try:
        cands = getattr(resp, "candidates", []) or []
        if not cands:
            return ""
        parts = getattr(getattr(cands[0], "content", None), "parts", None) or []
        out = "".join([getattr(p, "text", "") for p in parts]).strip()
        return out
    except Exception:
        return ""

def _generate_text_with_retry(model, prompt: str, generation_config: GenerationConfig) -> str:
    """
    Calls model.generate_content and returns safe text.
    Retries once with softened wording if first return is empty.
    """
    resp = model.generate_content(prompt, generation_config=generation_config)
    out = _safe_text(resp)
    if out:
        return out

    # Retry once with slightly safer phrasing
    softened = (
        prompt.replace("Emergency alert", "Public notice")
              .replace("boil-water", "water advisory")
              .replace("gas", "odor")
              .replace("fire", "urgent situation")
    )
    resp2 = model.generate_content(softened, generation_config=generation_config)
    return _safe_text(resp2)

def classify_question(model, question: str) -> str:
    prompt = f"""
Classify the question into EXACTLY one of these labels:
Employment
General Information
Emergency Services
Tax Related

Rules:
- Output ONLY the exact label text above.
- No extra words, punctuation, or explanation.

Question: {question}
Output:
""".strip()

    out = _generate_text_with_retry(model, prompt, GEN_CONFIG_DETERMINISTIC)
    out = out.replace(".", "").strip()

    # normalize common shortenings
    out = {"Emergency": "Emergency Services", "Tax": "Tax Related", "Taxes": "Tax Related"}.get(out, out)

    if out not in ALLOWED_CATEGORIES:
        raise ValueError(f"Unexpected category: {out!r}")
    return out

def generate_announcement(model, topic: str) -> str:
    prompt = f"""
Write ONE professional government social media post.

Rules:
- Max 200 characters
- MUST include the exact phrase: "Check for updates"
- Output ONLY the post text

Topic: {topic}
Post:
""".strip()

    post = _generate_text_with_retry(model, prompt, GEN_CONFIG_POSTS)
    post = re.sub(r"\s+", " ", post).strip()

    # Hard enforcement for deterministic tests
    if "check for updates" not in post.lower():
        if post and not post.endswith((".", "!", "?")):
            post += "."
        post = f"{post} Check for updates."

    return post[:200]

Overwriting app.py


In [None]:
import sys
if "app" in sys.modules:
    del sys.modules["app"]

In [None]:
!pytest -q


[33m[33mno tests ran[0m[33m in 0.00s[0m[0m


In [None]:
# ============================================================
# 2) Function: generate_announcement
# Requirement: social media posts for gov announcements:
# weather emergencies, holidays, school closings, etc. :contentReference[oaicite:2]{index=2}
# ============================================================

def generate_announcement(topic: str) -> str:
    prompt = f"""
You are a government communications officer.
Write ONE short social media post for the public.

Constraints:
- Professional, clear, calm tone.
- Max 200 characters.
- Must include a call-to-action or next step (e.g., "Visit...", "Call...", "Check...").
- Do NOT include hashtags unless the topic explicitly asks for them.
- Output ONLY the post text.

Topic: {topic}
Post:
""".strip()

    resp = model.generate_content(
        prompt,
        generation_config=GEN_CONFIG_POSTS
    )
    post = resp.text.strip()

    # basic enforcement: normalize whitespace
    post = re.sub(r"\s+", " ", post).strip()

    # Hard enforcement: ensure call-to-action and length
    call_to_action_keywords = ["visit", "check", "call", "follow", "more info", "updates"]
    has_call_to_action = any(k in post.lower() for k in call_to_action_keywords)

    if not has_call_to_action:
        suffix = " Check for updates."
        # If the post doesn't end with punctuation, add a period before the suffix
        if post and not post.endswith((".", "!", "?")):
            post += "."

        # Truncate if necessary to make space for the suffix, respecting the 200 char limit
        if len(post) + len(suffix) > 200:
            post = post[:(200 - len(suffix))].rstrip()
            if not post.endswith((".", "!", "?")):
                post += "."

        post += suffix

    # Final length check and truncation
    if len(post) > 200:
        post = post[:197].rstrip() + "..." # Truncate to 197 and add "..."

    return post

print(generate_announcement("Severe winter storm warning tonight. Roads may be unsafe. Include where to check updates."))



In [None]:
# ============================================================
# 3) Unit tests using pytest (required) :contentReference[oaicite:3]{index=3}
# We'll create a test file and run pytest.
# ============================================================

import os

# ensure env var exists (only needed if it's missing)
if not os.environ.get("GOOGLE_CLOUD_PROJECT"):
    pid = !gcloud config get-value project
    os.environ["GOOGLE_CLOUD_PROJECT"] = pid[0]

print("GOOGLE_CLOUD_PROJECT =", os.environ["GOOGLE_CLOUD_PROJECT"])

GOOGLE_CLOUD_PROJECT = qwiklabs-gcp-04-a859397f9965


In [None]:
TEST_FILE = "challenge3.py"

test_code = r'''
import re
import pytest
import app # Import the app module

# Initialize the model once for all tests in app.py
import os
PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
if not PROJECT_ID:
    raise RuntimeError("GOOGLE_CLOUD_PROJECT env var not set. Run the setup cell before pytest.")
model_instance = app.init_model(PROJECT_ID, "us-central1") # Initialize model once

def test_classify_employment():
    assert app.classify_question(model_instance, "How do I apply for a job with the city?") == "Employment"

def test_classify_general_info():
    assert app.classify_question(model_instance, "What are the library hours on Saturday?") == "General Information"

def test_classify_emergency_services():
    assert app.classify_question(model_instance, "I smell gas in my apartmentâ€”who should I contact immediately?") == "Emergency Services"

def test_classify_tax_related():
    assert app.classify_question(model_instance, "Where do I pay my property taxes?") == "Tax Related"

def test_generate_announcement_constraints():
    post = app.generate_announcement(model_instance, "School closing tomorrow due to severe snow. Include next steps.")
    assert isinstance(post, str)
    assert len(post) <= 200
    # must have a "next step" hint
    assert any(k in post.lower() for k in ["visit", "check", "call", "follow", "more info", "updates"])
    # professional: avoid excessive punctuation like "!!!"
    assert "!!!" not in post

def test_generate_announcement_bad_example_fails_rule_style():
    # Example of a post that violates our constraints (no call-to-action)
    bad_post = "City offices are closed tomorrow."
    assert not any(k in bad_post.lower() for k in ["visit", "check", "call", "follow", "updates"])
'''

with open(TEST_FILE, "w") as f:
    f.write(test_code)


In [None]:
!pytest -q


[33m[33mno tests ran[0m[33m in 0.00s[0m[0m


In [None]:
!rm -f challenge3.py

In [None]:
# ============================================================
# 4) Evaluation API (Vertex AI GenAI Evaluation Service)
# Requirement: evaluate and compare responses from different prompts :contentReference[oaicite:4]{index=4}
# We'll create a small dataset and run pointwise evaluation.
# ============================================================

# We'll build multiple prompt variants for the announcement generator
prompt_variants = [
    "Public meeting announcement: The City Council will vote on the new zoning proposal tonight. Explain how residents can participate.",
    "Community event notice: The annual spring festival has been rescheduled. Share the new date and where to find details.",
    "Utility update: Scheduled power maintenance will affect several neighborhoods this weekend. Inform residents what to expect.",
    "Transportation advisory: Downtown parking restrictions begin tomorrow. Explain the rules and where to view the affected areas.",
]
generated_posts = [generate_announcement(p) for p in prompt_variants]

eval_df = pd.DataFrame({
    "instruction": prompt_variants,
    "response": generated_posts,
})

eval_df

Unnamed: 0,instruction,response
0,Public meeting announcement: The City Council ...,City Council votes. Check for updates.
1,Community event notice: The annual spring fest...,Our Annual Spring. Check for updates.
2,Utility update: Scheduled power maintenance wi...,Scheduled power maintenance will affect some. ...
3,Transportation advisory: Downtown parking rest...,Downtown parking restrictions. Check for updates.


In [None]:
# dataframe: eval_df
# uuid: B132E6D9-DE2D-4423-951C-11ABC17CA981
# output_variable:
# config_str: Cv4LeyJjaGFydENvbmZpZyI6eyJkYXRhc291cmNlSWQiOiJfX1ZJWl9EQVRBU09VUkNFX18iLCJwcm9wZXJ0eUNvbmZpZyI6eyJjb21wb25lbnRQcm9wZXJ0eSI6eyJzb3J0IjpbeyJzb3J0RGlyIjoxLCJzb3J0Q29sdW1uIjoicXRfamJ6NG83cTUwZCJ9XSwiYnJlYWtkb3duQ29uZmlnIjpbXSwiZmlsdGVycyI6W10sImluaGVyaXRGaWx0ZXJzIjp0cnVlLCJkc1JlcXVpcmVkRmlsdGVycyI6W10sImRhdGFzZXQiOnsiZGF0YXNldFR5cGUiOjEsImRhdGFzZXRJZCI6Il9fVklaX0RBVEFTT1VSQ0VfXyJ9LCJyb3ciOjEwMCwiZGltZW5zaW9ucyI6eyJsYWJlbGVkQ29uY2VwdHMiOlt7ImtleSI6InByaW1hcnkiLCJ2YWx1ZSI6eyJjb25jZXB0TmFtZXMiOlsicXRfZDN6NG83cTUwZCIsInF0X2UzejRvN3E1MGQiXX19XX0sIm1ldHJpY3MiOnsibGFiZWxlZENvbmNlcHRzIjpbeyJrZXkiOiJwcmltYXJ5IiwidmFsdWUiOnsiY29uY2VwdE5hbWVzIjpbXX19XX0sInRhYmxlUHJvcGVydHkiOnsiaG9yaXpvbnRhbFNjcm9sbGluZyI6dHJ1ZSwiY29sdW1uc1dpZHRoIjpbMjguMzQ5NjA5Mzc1LDU3NC43NiwzMTcuNzZdLCJyb3dzSGVpZ2h0IjpbMzEsMzEsMzFdLCJ0YWJsZURpbWVuc2lvblByb3BlcnR5IjpbXSwidGFibGVNZXRyaWNQcm9wZXJ0eSI6W10sImJhY2tncm91bmRBbmRCb3JkZXJQcm9wZXJ0eSI6eyJib3JkZXIiOnsib3BhY2l0eSI6MCwic2l6ZSI6MCwicmFkaXVzIjowfX19LCJjb21wb25lbnRQcm9wZXJ0eU1pZ3JhdGlvblN0YXR1cyI6Mn19LCJjb25jZXB0RGVmcyI6W3siaWQiOiJ0MC5xdF9qYno0bzdxNTBkIiwibmFtZSI6InF0X2piejRvN3E1MGQiLCJuYW1lc3BhY2UiOiJ0MCIsInF1ZXJ5VGltZVRyYW5zZm9ybWF0aW9uIjp7ImRhdGFUcmFuc2Zvcm1hdGlvbiI6eyJzb3VyY2VGaWVsZE5hbWUiOiJyZXNwb25zZSIsImFnZ3JlZ2F0aW9uIjozfX19LHsiaWQiOiJ0MC5xdF9kM3o0bzdxNTBkIiwibmFtZSI6InF0X2QzejRvN3E1MGQiLCJuYW1lc3BhY2UiOiJ0MCIsInF1ZXJ5VGltZVRyYW5zZm9ybWF0aW9uIjp7ImRhdGFUcmFuc2Zvcm1hdGlvbiI6eyJzb3VyY2VGaWVsZE5hbWUiOiJpbnN0cnVjdGlvbiJ9fX0seyJpZCI6InQwLnF0X2UzejRvN3E1MGQiLCJuYW1lIjoicXRfZTN6NG83cTUwZCIsIm5hbWVzcGFjZSI6InQwIiwicXVlcnlUaW1lVHJhbnNmb3JtYXRpb24iOnsiZGF0YVRyYW5zZm9ybWF0aW9uIjp7InNvdXJjZUZpZWxkTmFtZSI6InJlc3BvbnNlIn19fV0sImF0dHJpYnV0ZUNvbmZpZyI6eyJjb21wb25lbnRBdHRyaWJ1dGUiOnsiZGlzcGxheUNvbmZpZ1ZlcnNpb24iOjAsImRhdGFzb3VyY2VDb25maWdWZXJzaW9uIjoyLCJ0b3AiOjAsImxlZnQiOjAsIndpZHRoIjo2NTcsImhlaWdodCI6NTcxfX0sImNvbXBvbmVudElkIjoiX19WSVpfQ0hBUlRfSURfXyIsInR5cGUiOiJzaW1wbGUtdGFibGUiLCJwcmVzZXQiOiJkZWZhdWx0IiwiYmVoYXZpb3IiOnsibWFwVmFsdWUiOnsiZW50cnkiOltdfX19LCJmaWx0ZXJzIjpbXSwiY2hhcnRJbnRlcmFjdGlvbnMiOltdLCJ2ZXJzaW9uIjoxfRoPCgtpbnN0cnVjdGlvbhABGgwKCHJlc3BvbnNlEAE=

import google.colabsqlviz.explore_dataframe as _vizcell
_vizcell.explore_dataframe(df_or_df_name='eval_df', uuid='B132E6D9-DE2D-4423-951C-11ABC17CA981', config_str='Cv4LeyJjaGFydENvbmZpZyI6eyJkYXRhc291cmNlSWQiOiJfX1ZJWl9EQVRBU09VUkNFX18iLCJwcm9wZXJ0eUNvbmZpZyI6eyJjb21wb25lbnRQcm9wZXJ0eSI6eyJzb3J0IjpbeyJzb3J0RGlyIjoxLCJzb3J0Q29sdW1uIjoicXRfamJ6NG83cTUwZCJ9XSwiYnJlYWtkb3duQ29uZmlnIjpbXSwiZmlsdGVycyI6W10sImluaGVyaXRGaWx0ZXJzIjp0cnVlLCJkc1JlcXVpcmVkRmlsdGVycyI6W10sImRhdGFzZXQiOnsiZGF0YXNldFR5cGUiOjEsImRhdGFzZXRJZCI6Il9fVklaX0RBVEFTT1VSQ0VfXyJ9LCJyb3ciOjEwMCwiZGltZW5zaW9ucyI6eyJsYWJlbGVkQ29uY2VwdHMiOlt7ImtleSI6InByaW1hcnkiLCJ2YWx1ZSI6eyJjb25jZXB0TmFtZXMiOlsicXRfZDN6NG83cTUwZCIsInF0X2UzejRvN3E1MGQiXX19XX0sIm1ldHJpY3MiOnsibGFiZWxlZENvbmNlcHRzIjpbeyJrZXkiOiJwcmltYXJ5IiwidmFsdWUiOnsiY29uY2VwdE5hbWVzIjpbXX19XX0sInRhYmxlUHJvcGVydHkiOnsiaG9yaXpvbnRhbFNjcm9sbGluZyI6dHJ1ZSwiY29sdW1uc1dpZHRoIjpbMjguMzQ5NjA5Mzc1LDU3NC43NiwzMTcuNzZdLCJyb3dzSGVpZ2h0IjpbMzEsMzEsMzFdLCJ0YWJsZURpbWVuc2lvblByb3BlcnR5IjpbXSwidGFibGVNZXRyaWNQcm9wZXJ0eSI6W10sImJhY2tncm91bmRBbmRCb3JkZXJQcm9wZXJ0eSI6eyJib3JkZXIiOnsib3BhY2l0eSI6MCwic2l6ZSI6MCwicmFkaXVzIjowfX19LCJjb21wb25lbnRQcm9wZXJ0eU1pZ3JhdGlvblN0YXR1cyI6Mn19LCJjb25jZXB0RGVmcyI6W3siaWQiOiJ0MC5xdF9qYno0bzdxNTBkIiwibmFtZSI6InF0X2piejRvN3E1MGQiLCJuYW1lc3BhY2UiOiJ0MCIsInF1ZXJ5VGltZVRyYW5zZm9ybWF0aW9uIjp7ImRhdGFUcmFuc2Zvcm1hdGlvbiI6eyJzb3VyY2VGaWVsZE5hbWUiOiJyZXNwb25zZSIsImFnZ3JlZ2F0aW9uIjozfX19LHsiaWQiOiJ0MC5xdF9kM3o0bzdxNTBkIiwibmFtZSI6InF0X2QzejRvN3E1MGQiLCJuYW1lc3BhY2UiOiJ0MCIsInF1ZXJ5VGltZVRyYW5zZm9ybWF0aW9uIjp7ImRhdGFUcmFuc2Zvcm1hdGlvbiI6eyJzb3VyY2VGaWVsZE5hbWUiOiJpbnN0cnVjdGlvbiJ9fX0seyJpZCI6InQwLnF0X2UzejRvN3E1MGQiLCJuYW1lIjoicXRfZTN6NG83cTUwZCIsIm5hbWVzcGFjZSI6InQwIiwicXVlcnlUaW1lVHJhbnNmb3JtYXRpb24iOnsiZGF0YVRyYW5zZm9ybWF0aW9uIjp7InNvdXJjZUZpZWxkTmFtZSI6InJlc3BvbnNlIn19fV0sImF0dHJpYnV0ZUNvbmZpZyI6eyJjb21wb25lbnRBdHRyaWJ1dGUiOnsiZGlzcGxheUNvbmZpZ1ZlcnNpb24iOjAsImRhdGFzb3VyY2VDb25maWdWZXJzaW9uIjoyLCJ0b3AiOjAsImxlZnQiOjAsIndpZHRoIjo2NTcsImhlaWdodCI6NTcxfX0sImNvbXBvbmVudElkIjoiX19WSVpfQ0hBUlRfSURfXyIsInR5cGUiOiJzaW1wbGUtdGFibGUiLCJwcmVzZXQiOiJkZWZhdWx0IiwiYmVoYXZpb3IiOnsibWFwVmFsdWUiOnsiZW50cnkiOltdfX19LCJmaWx0ZXJzIjpbXSwiY2hhcnRJbnRlcmFjdGlvbnMiOltdLCJ2ZXJzaW9uIjoxfRoPCgtpbnN0cnVjdGlvbhABGgwKCHJlc3BvbnNlEAE=')

<IPython.core.display.Javascript object>

In [None]:
# dataframe: eval_df
# uuid: B14B46CD-3E24-4A7F-9A2A-159720D55D68
# output_variable:
# config_str: Cv4LeyJjaGFydENvbmZpZyI6eyJkYXRhc291cmNlSWQiOiJfX1ZJWl9EQVRBU09VUkNFX18iLCJwcm9wZXJ0eUNvbmZpZyI6eyJjb21wb25lbnRQcm9wZXJ0eSI6eyJzb3J0IjpbeyJzb3J0RGlyIjoxLCJzb3J0Q29sdW1uIjoicXRfMHg0NjhybjQwZCJ9XSwiYnJlYWtkb3duQ29uZmlnIjpbXSwiZmlsdGVycyI6W10sImluaGVyaXRGaWx0ZXJzIjp0cnVlLCJkc1JlcXVpcmVkRmlsdGVycyI6W10sImRhdGFzZXQiOnsiZGF0YXNldFR5cGUiOjEsImRhdGFzZXRJZCI6Il9fVklaX0RBVEFTT1VSQ0VfXyJ9LCJyb3ciOjEwMCwiZGltZW5zaW9ucyI6eyJsYWJlbGVkQ29uY2VwdHMiOlt7ImtleSI6InByaW1hcnkiLCJ2YWx1ZSI6eyJjb25jZXB0TmFtZXMiOlsicXRfbWg2NjhybjQwZCIsInF0X25oNjY4cm40MGQiXX19XX0sIm1ldHJpY3MiOnsibGFiZWxlZENvbmNlcHRzIjpbeyJrZXkiOiJwcmltYXJ5IiwidmFsdWUiOnsiY29uY2VwdE5hbWVzIjpbXX19XX0sInRhYmxlUHJvcGVydHkiOnsiaG9yaXpvbnRhbFNjcm9sbGluZyI6dHJ1ZSwiY29sdW1uc1dpZHRoIjpbMjguMzQ5NjA5Mzc1LDU3NC43NiwzMTcuNzZdLCJyb3dzSGVpZ2h0IjpbMzEsMzEsMzFdLCJ0YWJsZURpbWVuc2lvblByb3BlcnR5IjpbXSwidGFibGVNZXRyaWNQcm9wZXJ0eSI6W10sImJhY2tncm91bmRBbmRCb3JkZXJQcm9wZXJ0eSI6eyJib3JkZXIiOnsib3BhY2l0eSI6MCwic2l6ZSI6MCwicmFkaXVzIjowfX19LCJjb21wb25lbnRQcm9wZXJ0eU1pZ3JhdGlvblN0YXR1cyI6Mn19LCJjb25jZXB0RGVmcyI6W3siaWQiOiJ0MC5xdF8weDQ2OHJuNDBkIiwibmFtZSI6InF0XzB4NDY4cm40MGQiLCJuYW1lc3BhY2UiOiJ0MCIsInF1ZXJ5VGltZVRyYW5zZm9ybWF0aW9uIjp7ImRhdGFUcmFuc2Zvcm1hdGlvbiI6eyJzb3VyY2VGaWVsZE5hbWUiOiJyZXNwb25zZSIsImFnZ3JlZ2F0aW9uIjozfX19LHsiaWQiOiJ0MC5xdF9taDY2OHJuNDBkIiwibmFtZSI6InF0X21oNjY4cm40MGQiLCJuYW1lc3BhY2UiOiJ0MCIsInF1ZXJ5VGltZVRyYW5zZm9ybWF0aW9uIjp7ImRhdGFUcmFuc2Zvcm1hdGlvbiI6eyJzb3VyY2VGaWVsZE5hbWUiOiJpbnN0cnVjdGlvbiJ9fX0seyJpZCI6InQwLnF0X25oNjY4cm40MGQiLCJuYW1lIjoicXRfbmg2NjhybjQwZCIsIm5hbWVzcGFjZSI6InQwIiwicXVlcnlUaW1lVHJhbnNmb3JtYXRpb24iOnsiZGF0YVRyYW5zZm9ybWF0aW9uIjp7InNvdXJjZUZpZWxkTmFtZSI6InJlc3BvbnNlIn19fV0sImF0dHJpYnV0ZUNvbmZpZyI6eyJjb21wb25lbnRBdHRyaWJ1dGUiOnsiZGlzcGxheUNvbmZpZ1ZlcnNpb24iOjAsImRhdGFzb3VyY2VDb25maWdWZXJzaW9uIjoyLCJ0b3AiOjAsImxlZnQiOjAsIndpZHRoIjo2NTcsImhlaWdodCI6NTcxfX0sImNvbXBvbmVudElkIjoiX19WSVpfQ0hBUlRfSURfXyIsInR5cGUiOiJzaW1wbGUtdGFibGUiLCJwcmVzZXQiOiJkZWZhdWx0IiwiYmVoYXZpb3IiOnsibWFwVmFsdWUiOnsiZW50cnkiOltdfX19LCJmaWx0ZXJzIjpbXSwiY2hhcnRJbnRlcmFjdGlvbnMiOltdLCJ2ZXJzaW9uIjoxfRoPCgtpbnN0cnVjdGlvbhABGgwKCHJlc3BvbnNlEAE=

import google.colabsqlviz.explore_dataframe as _vizcell
_vizcell.explore_dataframe(df_or_df_name='eval_df', uuid='B14B46CD-3E24-4A7F-9A2A-159720D55D68', config_str='Cv4LeyJjaGFydENvbmZpZyI6eyJkYXRhc291cmNlSWQiOiJfX1ZJWl9EQVRBU09VUkNFX18iLCJwcm9wZXJ0eUNvbmZpZyI6eyJjb21wb25lbnRQcm9wZXJ0eSI6eyJzb3J0IjpbeyJzb3J0RGlyIjoxLCJzb3J0Q29sdW1uIjoicXRfMHg0NjhybjQwZCJ9XSwiYnJlYWtkb3duQ29uZmlnIjpbXSwiZmlsdGVycyI6W10sImluaGVyaXRGaWx0ZXJzIjp0cnVlLCJkc1JlcXVpcmVkRmlsdGVycyI6W10sImRhdGFzZXQiOnsiZGF0YXNldFR5cGUiOjEsImRhdGFzZXRJZCI6Il9fVklaX0RBVEFTT1VSQ0VfXyJ9LCJyb3ciOjEwMCwiZGltZW5zaW9ucyI6eyJsYWJlbGVkQ29uY2VwdHMiOlt7ImtleSI6InByaW1hcnkiLCJ2YWx1ZSI6eyJjb25jZXB0TmFtZXMiOlsicXRfbWg2NjhybjQwZCIsInF0X25oNjY4cm40MGQiXX19XX0sIm1ldHJpY3MiOnsibGFiZWxlZENvbmNlcHRzIjpbeyJrZXkiOiJwcmltYXJ5IiwidmFsdWUiOnsiY29uY2VwdE5hbWVzIjpbXX19XX0sInRhYmxlUHJvcGVydHkiOnsiaG9yaXpvbnRhbFNjcm9sbGluZyI6dHJ1ZSwiY29sdW1uc1dpZHRoIjpbMjguMzQ5NjA5Mzc1LDU3NC43NiwzMTcuNzZdLCJyb3dzSGVpZ2h0IjpbMzEsMzEsMzFdLCJ0YWJsZURpbWVuc2lvblByb3BlcnR5IjpbXSwidGFibGVNZXRyaWNQcm9wZXJ0eSI6W10sImJhY2tncm91bmRBbmRCb3JkZXJQcm9wZXJ0eSI6eyJib3JkZXIiOnsib3BhY2l0eSI6MCwic2l6ZSI6MCwicmFkaXVzIjowfX19LCJjb21wb25lbnRQcm9wZXJ0eU1pZ3JhdGlvblN0YXR1cyI6Mn19LCJjb25jZXB0RGVmcyI6W3siaWQiOiJ0MC5xdF8weDQ2OHJuNDBkIiwibmFtZSI6InF0XzB4NDY4cm40MGQiLCJuYW1lc3BhY2UiOiJ0MCIsInF1ZXJ5VGltZVRyYW5zZm9ybWF0aW9uIjp7ImRhdGFUcmFuc2Zvcm1hdGlvbiI6eyJzb3VyY2VGaWVsZE5hbWUiOiJyZXNwb25zZSIsImFnZ3JlZ2F0aW9uIjozfX19LHsiaWQiOiJ0MC5xdF9taDY2OHJuNDBkIiwibmFtZSI6InF0X21oNjY4cm40MGQiLCJuYW1lc3BhY2UiOiJ0MCIsInF1ZXJ5VGltZVRyYW5zZm9ybWF0aW9uIjp7ImRhdGFUcmFuc2Zvcm1hdGlvbiI6eyJzb3VyY2VGaWVsZE5hbWUiOiJpbnN0cnVjdGlvbiJ9fX0seyJpZCI6InQwLnF0X25oNjY4cm40MGQiLCJuYW1lIjoicXRfbmg2NjhybjQwZCIsIm5hbWVzcGFjZSI6InQwIiwicXVlcnlUaW1lVHJhbnNmb3JtYXRpb24iOnsiZGF0YVRyYW5zZm9ybWF0aW9uIjp7InNvdXJjZUZpZWxkTmFtZSI6InJlc3BvbnNlIn19fV0sImF0dHJpYnV0ZUNvbmZpZyI6eyJjb21wb25lbnRBdHRyaWJ1dGUiOnsiZGlzcGxheUNvbmZpZ1ZlcnNpb24iOjAsImRhdGFzb3VyY2VDb25maWdWZXJzaW9uIjoyLCJ0b3AiOjAsImxlZnQiOjAsIndpZHRoIjo2NTcsImhlaWdodCI6NTcxfX0sImNvbXBvbmVudElkIjoiX19WSVpfQ0hBUlRfSURfXyIsInR5cGUiOiJzaW1wbGUtdGFibGUiLCJwcmVzZXQiOiJkZWZhdWx0IiwiYmVoYXZpb3IiOnsibWFwVmFsdWUiOnsiZW50cnkiOltdfX19LCJmaWx0ZXJzIjpbXSwiY2hhcnRJbnRlcmFjdGlvbnMiOltdLCJ2ZXJzaW9uIjoxfRoPCgtpbnN0cnVjdGlvbhABGgwKCHJlc3BvbnNlEAE=')

<IPython.core.display.Javascript object>

In [None]:
from vertexai.preview.evaluation import EvalTask

print("Using eval import from: vertexai.preview.evaluation (EvalTask only)")

# Dataset in the format EvalTask expects
dataset = pd.DataFrame({
    "instruction": eval_df["instruction"].tolist(),
    "response": eval_df["response"].tolist(),
})

# Metrics (keep these; if one errors weâ€™ll remove/replace)
metrics = ["coherence", "safety"]

qa_eval_task = EvalTask(
    dataset=dataset,
    metrics=metrics,
    experiment="challenge3-gov-announcements",
    metric_column_mapping={"prompt": "instruction"} # Map 'prompt' to 'instruction' column
)

run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
result = qa_eval_task.evaluate(
    experiment_run_name=f"challenge3-run-{run_ts}"
)

print("Summary metrics:")
print(result.summary_metrics)

print("\nMetrics table (head):")
display(result.metrics_table.head())

Using eval import from: vertexai.preview.evaluation (EvalTask only)


INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 8 Vertex Gen AI Evaluation Service API requests.
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:14<00:00,  1.77s/it]
INFO:vertexai.evaluation._evaluation:All 8 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:14.151163301001361 seconds


Summary metrics:
{'row_count': 4, 'coherence/mean': np.float64(1.0), 'coherence/std': 0.0, 'safety/mean': np.float64(1.0), 'safety/std': 0.0}

Metrics table (head):


Unnamed: 0,instruction,response,coherence/explanation,coherence/score,safety/explanation,safety/score
0,Public meeting announcement: The City Council ...,City Council votes. Check for updates.,The response is highly illogical and incoheren...,1.0,"The response is brief and unhelpful, but it do...",1.0
1,Community event notice: The annual spring fest...,Our Annual Spring. Check for updates.,The response is highly illogical and incoheren...,1.0,"The response is free from any hate speech, har...",1.0
2,Utility update: Scheduled power maintenance wi...,Scheduled power maintenance will affect some. ...,The response is highly illogical and lacks any...,1.0,"The response is brief and unhelpful, but it do...",1.0
3,Transportation advisory: Downtown parking rest...,Downtown parking restrictions. Check for updates.,The AI response is highly illogical and incohe...,1.0,The response is brief but does not contain any...,1.0
