In [1]:
%pip install --upgrade --quiet google-cloud-aiplatform google-cloud-aiplatform[evaluation]

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/6.9 MB[0m [31m16.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m3.8/6.9 MB[0m [31m54.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.9/6.9 MB[0m [31m72.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m56.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
import datetime
import nest_asyncio
import pandas as pd
from IPython.display import display, Markdown, HTML

import vertexai
from vertexai.generative_models import GenerativeModel

pd.set_option('display.max_colwidth', None)

In [2]:
PROJECT_ID = "qwiklabs-gcp-01-ef31424894e2"
LOCATION = "us-central1"
import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)

In [3]:
!gcloud storage cp gs://partner-genai-bucket/genai065/apartment_table.csv .

Copying gs://partner-genai-bucket/genai065/apartment_table.csv to file://./apartment_table.csv


In [4]:
apartment_df = pd.read_csv("apartment_table.csv")
apartment_df.head()

Unnamed: 0,Address,Unit,Sqft,Bedrooms,Elevator,Washer & Dryer in Unit,Pets Allowed,Notable features
0,"123 West 14th Street, New York, NY 10014",2E,550,2,yes,no,yes,"doorman, pool in the building, shared roof deck with grills"
1,"456 East 57th Street, New York, NY 10022",1A,789,1,no,yes,no,"bike room, package service"
2,"789 Broadway, New York, NY 10003",C,999,3,yes,yes,yes,"excellent laundry room, great city views"
3,"1011 5th Avenue, New York, NY 10028",30,1024,2,no,no,yes,"great view of Central Park, high ceilings"
4,"2222 Park Avenue, New York, NY 10017",4F,1234,1,no,yes,no,right next to soccer fields at the park


In [5]:
apartment_records = apartment_df.to_dict(orient='records')
apartment_records[0]

{'Address': '123 West 14th Street, New York, NY 10014',
 'Unit': '2E',
 'Sqft': 550,
 'Bedrooms': 2,
 'Elevator': 'yes',
 'Washer & Dryer in Unit': 'no',
 'Pets Allowed': 'yes',
 'Notable features': 'doorman, pool in the building, shared roof deck with grills'}

In [6]:
model = GenerativeModel(
  "gemini-pro",
  generation_config={
      "temperature": 0,
      "top_p": 0.4,
  },
)

prompt = "Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: "

# View the response using Markdown to format it nicely for notebook viewing
Markdown(model.generate_content(prompt + str(apartment_records[0])).text)

Live the high life in this stunning 2-bedroom apartment at 123 West 14th Street! This spacious 550 sq ft unit boasts a doorman for added security and convenience, and an elevator whisks you to your floor in seconds. Enjoy the luxury of a shared roof deck with grills, perfect for summer barbecues with friends, and take a dip in the building's sparkling pool to cool off on hot days. While there's no washer/dryer in the unit, on-site laundry facilities are available for your convenience. And best of all, your furry friends are welcome! Don't miss your chance to call this amazing apartment home. Contact us today to schedule a viewing! 


In [7]:
# Context is the supplemental information you provide the
# model, usually specific to a given query or example,
# that it needs to fulfill your instructions.
# In this case, the context is each apartment record.
contexts = [str(record) for record in apartment_records]
# The full prompt combines the prompt instructions you
# created earlier with the context for each apartment.
full_prompts = [prompt + str(record) for record in apartment_records]

print(full_prompts[0])

Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '123 West 14th Street, New York, NY 10014', 'Unit': '2E', 'Sqft': 550, 'Bedrooms': 2, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'doorman, pool in the building, shared roof deck with grills'}


In [8]:
eval_dataset = pd.DataFrame({
    "prompt": full_prompts[0:5],
})

In [9]:
from vertexai.evaluation import (
    MetricPromptTemplateExamples,
    EvalTask,
    PairwiseMetric,
    PairwiseMetricPromptTemplate,
    PointwiseMetric,
    PointwiseMetricPromptTemplate,
)

MetricPromptTemplateExamples.list_example_metric_names()

['coherence',
 'fluency',
 'safety',
 'groundedness',
 'instruction_following',
 'verbosity',
 'text_quality',
 'summarization_quality',
 'question_answering_quality',
 'multi_turn_chat_quality',
 'multi_turn_safety',
 'pairwise_coherence',
 'pairwise_fluency',
 'pairwise_safety',
 'pairwise_groundedness',
 'pairwise_instruction_following',
 'pairwise_verbosity',
 'pairwise_text_quality',
 'pairwise_summarization_quality',
 'pairwise_question_answering_quality',
 'pairwise_multi_turn_chat_quality',
 'pairwise_multi_turn_safety']

In [10]:
print(MetricPromptTemplateExamples.get_prompt_template('groundedness'))


# Instruction
You are an expert evaluator. Your task is to evaluate the quality of the responses generated by AI models.
We will provide you with the user input and an AI-generated response.
You should first read the user input carefully for analyzing the task, and then evaluate the quality of the responses based on the criteria provided in the Evaluation section below.
You will assign the response a rating following the Rating Rubric and Evaluation Steps. Give step by step explanations for your rating, and only choose ratings from the Rating Rubric.


# Evaluation
## Metric Definition
You will be assessing groundedness, which measures the ability to provide or reference information included only in the user prompt.

## Criteria
Groundedness: The response contains information included only in the user prompt. The response does not reference any outside information.

## Rating Rubric
1: (Fully grounded). All aspects of the response are attributable to the context.
0: (Not fully grounde

In [11]:
eval_task = EvalTask(
  dataset=eval_dataset,
  metrics=[MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS],
  experiment="apartment-listing-generation",
)

In [12]:
run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
eval_result = eval_task.evaluate(
  model=model,
  experiment_run_name=f"apt-gen-{run_ts}"
  )

# You might want to keep track of your results in a list
# which you will use to plot your results later on
eval_results_to_compare = []
eval_results_to_compare.append(eval_result)

INFO:google.cloud.aiplatform.metadata.experiment_resources:Associating projects/625428184347/locations/us-central1/metadataStores/default/contexts/apartment-listing-generation-apt-gen-20250118-164844 to Experiment: apartment-listing-generation


INFO:vertexai.evaluation.eval_task:Logging Eval Experiment metadata: {'model_name': 'publishers/google/models/gemini-pro', 'temperature': 0, 'top_p': 0.4}
INFO:vertexai.evaluation._evaluation:Generating a total of 5 responses from Gemini model gemini-pro.
100%|██████████| 5/5 [00:02<00:00,  2.45it/s]
INFO:vertexai.evaluation._evaluation:All 5 responses are successfully generated from Gemini model gemini-pro.
INFO:vertexai.evaluation._evaluation:Multithreaded Batch Inference took: 2.060304406 seconds.
INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 5 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 5/5 [00:10<00:00,  2.10s/it]
INFO:vertexai.evaluation._evaluation:All 5 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:10.509581986000057 seconds


In [13]:
eval_result.summary_metrics

{'row_count': 5,
 'groundedness/mean': 0.8,
 'groundedness/std': 0.44721359549995804}

In [14]:
eval_result.metrics_table

Unnamed: 0,prompt,response,groundedness/explanation,groundedness/score
0,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '123 West 14th Street, New York, NY 10014', 'Unit': '2E', 'Sqft': 550, 'Bedrooms': 2, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'doorman, pool in the building, shared roof deck with grills'}","Live the high life in this stunning 2-bedroom apartment at 123 West 14th Street! This spacious 550 sq ft unit boasts a doorman for added security and convenience, and an elevator whisks you to your floor in seconds. Enjoy the luxury of a shared roof deck with grills, perfect for summer barbecues with friends, and take a dip in the building's sparkling pool to cool off on hot days. While there's no washer/dryer in the unit, on-site laundry facilities are available for your convenience. And best of all, your furry friends are welcome! Don't miss your chance to call this amazing apartment home. Contact us today to schedule a viewing! \n",The response is fully grounded. All information in the response can be found in the prompt.,1.0
1,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '456 East 57th Street, New York, NY 10022', 'Unit': '1A', 'Sqft': 789, 'Bedrooms': 1, 'Elevator': 'no', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'no', 'Notable features': 'bike room, package service'}","Live the high life in this stunning 1-bedroom apartment at 456 East 57th Street! This spacious 789 sq ft unit boasts a modern kitchen with stainless steel appliances, a luxurious bathroom with designer finishes, and a washer & dryer for your convenience. Enjoy the convenience of a bike room and package service, all in a pet-free building. While there is no elevator, the stunning views from your apartment will make the climb worthwhile. Don't miss your chance to call this amazing apartment home!","The AI response mentioned stainless steel appliances, designer finishes in the bathroom, modern kitchen and stunning views from the apartment which is not provided in the prompt. Thus, it is not fully grounded.",0.0
2,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '789 Broadway, New York, NY 10003', 'Unit': 'C', 'Sqft': 999, 'Bedrooms': 3, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'yes', 'Notable features': 'excellent laundry room, great city views'}","Live the high life in this stunning 3-bedroom apartment at 789 Broadway! This spacious 999 sq ft unit boasts breathtaking city views and an excellent laundry room. Enjoy the convenience of an elevator building and in-unit washer & dryer. Plus, bring your furry friends along – this pet-friendly apartment welcomes all. Don't miss your chance to call this amazing place home! Contact us today to schedule a viewing.",The response is fully grounded. All the information provided in the response is included in the prompt. The response promotes the apartment by highlighting all of the positive aspects as well as the address and unit number.,1.0
3,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '1011 5th Avenue, New York, NY 10028', 'Unit': '30', 'Sqft': 1024, 'Bedrooms': 2, 'Elevator': 'no', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'great view of Central Park, high ceilings'}","Live in the lap of luxury at this stunning 2-bedroom apartment in the heart of Manhattan. Situated on the 30th floor of a prestigious building on 5th Avenue, this spacious 1,024 sq ft apartment boasts breathtaking views of Central Park and the iconic New York City skyline. High ceilings and an abundance of natural light create an airy and inviting atmosphere. While there is no elevator or washer/dryer in the unit, the unparalleled location and stunning views more than compensate. Pet lovers will appreciate the pet-friendly policy. Don't miss your chance to call this exceptional apartment home!","The response mentions all the details provided in the prompt, such as the address, unit number, square footage, number of bedrooms, lack of elevator and in-unit washer/dryer, pet-friendly policy, view of Central Park, and high ceilings. It accurately reflects the information without hallucinations.",1.0
4,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '2222 Park Avenue, New York, NY 10017', 'Unit': '4F', 'Sqft': 1234, 'Bedrooms': 1, 'Elevator': 'no', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'no', 'Notable features': 'right next to soccer fields at the park'}","Live the dream in this spacious 1-bedroom apartment at 2222 Park Avenue, New York, NY 10017. This 1234 sq ft oasis boasts a washer & dryer in unit for ultimate convenience, and is just steps away from the vibrant soccer fields at the park. While there's no elevator, the stunning views and easy access to outdoor activities make this 4th-floor unit a true gem. Sorry, no pets allowed. Contact us today to schedule a viewing! \n","The response mentions each piece of information provided in the prompt and does not include any extraneous information. Therefore, the response is fully grounded.",1.0


In [15]:
# This is a minor update, but should stop the model from
# inventing as many details about each apartment.
updated_prompt = "Write a one paragraph apartment listing highlighting the best known features of this apartment. Use only the details included in the following information: "

updated_full_prompts = [updated_prompt + str(record) for record in apartment_records]

updated_eval_dataset = pd.DataFrame(
  {
      "prompt": updated_full_prompts[0:5]
  }
)

In [16]:
updated_eval_task = EvalTask(
  dataset=updated_eval_dataset,
  metrics=[MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS],
  experiment="apartment-listing-generation",
)

run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
updated_result = updated_eval_task.evaluate(
  model=model,
  experiment_run_name=f"apt-gen-{run_ts}"
)

# Append the new result to your results
eval_results_to_compare.append(updated_result)

# Preview the summary
print(updated_result.summary_metrics)

INFO:google.cloud.aiplatform.metadata.experiment_resources:Associating projects/625428184347/locations/us-central1/metadataStores/default/contexts/apartment-listing-generation-apt-gen-20250118-165209 to Experiment: apartment-listing-generation


INFO:vertexai.evaluation.eval_task:Logging Eval Experiment metadata: {'model_name': 'publishers/google/models/gemini-pro', 'temperature': 0, 'top_p': 0.4}
INFO:vertexai.evaluation._evaluation:Generating a total of 5 responses from Gemini model gemini-pro.
100%|██████████| 5/5 [00:01<00:00,  3.23it/s]
INFO:vertexai.evaluation._evaluation:All 5 responses are successfully generated from Gemini model gemini-pro.
INFO:vertexai.evaluation._evaluation:Multithreaded Batch Inference took: 1.560084803999871 seconds.
INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 5 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 5/5 [00:06<00:00,  1.32s/it]
INFO:vertexai.evaluation._evaluation:All 5 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:6.605920380000043 seconds


{'row_count': 5, 'groundedness/mean': 0.8, 'groundedness/std': 0.44721359549995804}


In [17]:
import plotly.graph_objects as go
def plot_bar_plot(eval_results, metrics=None):
  fig = go.Figure()
  data = []
  for eval_result in eval_results:
      summary_metrics = eval_result.summary_metrics
      if metrics:
          summary_metrics = {
              k: summary_metrics[k]
              for k, v in summary_metrics.items()
              if any(selected_metric in k for selected_metric in metrics)
          }


      data.append(
          go.Bar(
              x=list(summary_metrics.keys()),
              y=list(summary_metrics.values()),
              name=eval_result.metadata["experiment_run"]
          )
      )
  fig = go.Figure(data=data)


  # Change the bar mode
  fig.update_layout(barmode="group")
  fig.show()


plot_bar_plot(eval_results_to_compare, metrics=["groundedness/mean"])