# Upgrade the Vertex AI SDK & Restart the Kernel

In [None]:
!pip install --quiet --upgrade google-cloud-logging google_cloud_firestore google_cloud_aiplatform langchain langchain-google-vertexai langchain_community langchain_experimental pymupdf requests==2.32.3 protobuf==5.29.5

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.3/45.3 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m319.9/319.9 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m229.5/229.5 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m79.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.9/104.9 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m91.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.2/209.2 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# Import libraries

In [2]:
import vertexai
import logging
import google.cloud.logging
from vertexai.language_models import TextEmbeddingModel
from vertexai.generative_models import GenerativeModel

import pickle
from IPython.display import display, Markdown

from langchain_google_vertexai import VertexAIEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_experimental.text_splitter import SemanticChunker

from google.cloud import firestore
from google.cloud.firestore_v1.vector import Vector
from google.cloud.firestore_v1.base_vector_query import DistanceMeasure

In [3]:
PROJECT_ID = "qwiklabs-gcp-03-6697f608da39"
LOCATION = "us-central1"
import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)

In [4]:
embedding_model = VertexAIEmbeddings(model_name="text-embedding-005")



# Task 2. Download, process and chunk data semantically


In [5]:
!gcloud storage cp gs://partner-genai-bucket/genai109/nyc_food_safety_manual.pdf .

Copying gs://partner-genai-bucket/genai109/nyc_food_safety_manual.pdf to file://./nyc_food_safety_manual.pdf

Average throughput: 67.6MiB/s


In [6]:
loader = PyMuPDFLoader("nyc_food_safety_manual.pdf")
data = loader.load()

In [7]:
def clean_page(page):
    return page.page_content.replace("-\n","")\
                        .replace("\n"," ")\
                        .replace("\x02","")\
                        .replace("\x03","")\
                        .replace("fo d P R O T E C T I O N  T R A I N I N G  M A N U A L","")\
                        .replace("N E W  Y O R K  C I T Y  D E P A R T M E N T  O F  H E A L T H  &  M E N T A L  H Y G I E N E","")
cleaned_pages = [clean_page(page) for page in data]

In [8]:
text_splitter = SemanticChunker(embedding_model)
chunked_content = text_splitter.create_documents(cleaned_pages[0:5])
chunked_content = [chunk.page_content for chunk in chunked_content if chunk.page_content]

In [9]:
display(chunked_content[0:5])

 'Registration is done on-line. The link is: nyc.gov/foodprotectioncourse Register for Health Academy Classes On-Line You may now register and pay online for courses offered at the Department of Health and Mental Hygiene’s Health Academy, including the Food Protection Course for restaurants. This new service allows you to avoid going to the Citywide Licensing Center to register for a course. You may also use the on-line service to pay for and request an appointment to replace your Food Protection Certificate. How does it work? Go to the registration web page, nyc.gov/healthacademy, select a course and date, pay the appropriate fee and receive confirmation. You will be asked to provide some personal information before registering. In most cases, you will be able to select from a list of course dates.',
 'If you don’t see a date that is convenient, check back as new course dates are added frequently. 1   INTRODUCTION T he New York City Department of Health and Mental Hygiene has the juri

In [10]:
chunked_embeddings = embedding_model.embed_documents(chunked_content)

In [11]:
display(chunked_embeddings[0:1])

[[-0.04205901920795441,
  -0.018308516591787338,
  -0.01328700315207243,
  -0.05176236853003502,
  0.016785096377134323,
  0.034466180950403214,
  0.013948620297014713,
  0.02817848138511181,
  0.02424997091293335,
  0.009253749623894691,
  -0.05151556432247162,
  -0.0885138213634491,
  -0.00404269527643919,
  -0.10059984028339386,
  0.019663695245981216,
  -0.039846841245889664,
  0.0046494207344949245,
  -0.04914997145533562,
  -0.007427764590829611,
  0.03015778958797455,
  -0.008879957720637321,
  -0.062279995530843735,
  0.0011091545457020402,
  -0.011204574257135391,
  -0.03136909008026123,
  0.010546463541686535,
  -0.003176371566951275,
  -0.0329093374311924,
  -0.028883982449769974,
  -0.024877125397324562,
  0.024422328919172287,
  0.05332880839705467,
  0.0789521336555481,
  -0.03262040391564369,
  0.01957697980105877,
  -0.026079880073666573,
  0.036542996764183044,
  0.007938552647829056,
  0.04648442193865776,
  0.010396458208560944,
  0.03674811124801636,
  0.00399580504

In [14]:
len(chunked_embeddings[0])

768

# Task 3. Load Document Chunks and Embeddings


In [15]:
!gcloud storage cp gs://partner-genai-bucket/genai109/chunked_content.pkl .
!gcloud storage cp gs://partner-genai-bucket/genai109/chunked_embeddings.pkl .

Copying gs://partner-genai-bucket/genai109/chunked_content.pkl to file://./chunked_content.pkl
Copying gs://partner-genai-bucket/genai109/chunked_embeddings.pkl to file://./chunked_embeddings.pkl

Average throughput: 146.7MiB/s


In [16]:
chunked_content = pickle.load(open("chunked_content.pkl", "rb"))
chunked_embeddings = pickle.load(open("chunked_embeddings.pkl", "rb"))

# Task 4. Query Firestore and Generate Answer


In [17]:
# Initialize Firestore client
db = firestore.Client(project=PROJECT_ID)
collection = db.collection("food_safety_chunks")

# Store each embedding and chunk
pairs = zip(chunked_content, chunked_embeddings)

for content, embedding in pairs:
    doc = {
        "chunk": content,
        "embedding": Vector(embedding)
    }
    collection.add(doc)

In [18]:
# Create the vector index on the embedding field
! gcloud firestore indexes composite create \
--collection-group=food_safety_chunks \
--query-scope=COLLECTION \
--field-config=field-path=embedding,vector-config='{"dimension":768,"flat": "{}"}'

Create request issued
Created index [CICAgOjXh4EK].


In [19]:
# embed a user query, retrieve the most relevant chunks and generate an answer
def query_firestore(query: str, k=5):
    """
    Performs a vector search in Firestore to find the top_k most relevant chunks.
    """
    # 1. Generate the embedding for the query.
    emb = embedding_model.embed_query(query)

    # 2. Find the 5 nearest neighbors.
    neighbors = collection.find_nearest(
        vector_field="embedding",
        query_vector=Vector(emb),
        distance_measure=DistanceMeasure.COSINE,
        limit=k
    ).get()

    # 3. Extract the 'chunk' field from each document.
    context = "\n\n".join([neighbor.to_dict()["chunk"] for neighbor in neighbors])

    return context


In [20]:
# retrieve the most relevant content chunks from Firestore, use those chunks as context to generate an answer using the Gemini model

%%capture --no-stdout

query = "What should you do if food is left out overnight?"
relevant_text = query_firestore(query)
print(f"### Related Content:\n\n{relevant_text}\n\n")

chat_model = GenerativeModel("gemini-2.5-flash")  # Instantiate GenerativeModel directly
chat = chat_model.start_chat()

response = chat.send_message(
    f"Use the following to answer the question:\n\n{relevant_text}\n\nQuestion: {query}"
)
print(f"### Model Response:\n\n{response.text}")

### Related Content:

Food temp. 140–70°F (Left overs) growth of microorganisms within 2 hours; Below 41°F Keep foods uncovered  more than 2 hours, discard within additional 4 hours during cooling 70–41°F  >4 hours, discard Small portions of meat  and shallow containers Reheating Improper equipment Yes Reheat to 165°F  Check food temperature If less than 165°F Slow reheating within 2 hours every hour continue reheating Food in temperature danger zone Use stove or oven Process Hazards CCP Criteria for control Monitoring procedure Action when criteria not met Receiving Unapproved source No Inspect source Check cans for defects Return to supplier or discard (Cans of tuna and Defective cans No home canned products jars of mayonnaise) Home prepared Storage Under sewer lines No In storage area away Observe storage Discard from sewer lines Preparation Not using pre-chilled ingredients Yes No bare hand contact Maintain food ≤41°F Discard food if: Mixing with bare hands Ill workers not working 

In [None]:
# retrieve the most relevant content chunks from Firestore, use those chunks as context to generate an answer using the Gemini model

%%capture --no-stdout

query = "What are some safe ways to cook thawed chicken dumplings?"
relevant_text = query_firestore(query)
print(f"### Related Content:\n\n{relevant_text}\n\n")

chat_model = GenerativeModel("gemini-2.5-flash")  # Instantiate GenerativeModel directly
chat = chat_model.start_chat()

response = chat.send_message(
    f"Use the following to answer the question:\n\n{relevant_text}\n\nQuestion: {query}"
)
print(f"### Model Response:\n\n{response.text}")

### Related Content:

Food temp. 140–70°F (Left overs) growth of microorganisms within 2 hours; Below 41°F Keep foods uncovered  more than 2 hours, discard within additional 4 hours during cooling 70–41°F  >4 hours, discard Small portions of meat  and shallow containers Reheating Improper equipment Yes Reheat to 165°F  Check food temperature If less than 165°F Slow reheating within 2 hours every hour continue reheating Food in temperature danger zone Use stove or oven Process Hazards CCP Criteria for control Monitoring procedure Action when criteria not met Receiving Unapproved source No Inspect source Check cans for defects Return to supplier or discard (Cans of tuna and Defective cans No home canned products jars of mayonnaise) Home prepared Storage Under sewer lines No In storage area away Observe storage Discard from sewer lines Preparation Not using pre-chilled ingredients Yes No bare hand contact Maintain food ≤41°F Discard food if: Mixing with bare hands Ill workers not working 

In [23]:
# retrieve the most relevant content chunks from Firestore, use those chunks as context to generate an answer using the Gemini model

%%capture --no-stdout

query = "What are some safe ways to cook thawed chicken dumplings in the fridge, using only freshly boiled water from a kettle (that stops boiling after reaching 100 degree celsius) and microwave, and a microwaveable container? If there are temperatures, pls think step by step what is the temperature stated, then convert to degree-celsius."
relevant_text = query_firestore(query)
print(f"### Related Content:\n\n{relevant_text}\n\n")

chat_model = GenerativeModel("gemini-2.5-flash")  # Instantiate GenerativeModel directly
chat = chat_model.start_chat()

response = chat.send_message(
    f"Use the following to answer the question:\n\n{relevant_text}\n\nQuestion: {query}"
)
print(f"### Model Response:\n\n{response.text}")

### Related Content:

Microwave Safe 2 4 140° 70° HOURS 41° fo d P R O T E C T I O N  T R A I N I N G  M A N U A L Established Methods to Assist Rapid Cooling:   Always leave food uncovered during cooling; this will hasten the cooling process, replace the cover or lid only after the food has cooled down to 41°F.   Foods must be cooled in a refrigerator or in an ice bath. They should never be allowed to cool while stored at room temperature, for example, being left out on top of a counter.   Cut large pieces of meat into smaller pieces, 6 pounds or less. Such small portions will cool rapidly. Larger pieces take longer to cool.  Do not cool foods in large deep pots. Pour out the contents of large containers into much smaller ones or into shallow pans 4 inches deep with the product depth of 1 to 2 inches.  Use an ice water bath. An ice water bath can be made up by filling a sink or other large container with ice and water. Place the container of hot food into the ice water bath. Whil