Installing libraries for Vertex AI SDK and pandas

In [None]:
!pip install -U google-cloud-aiplatform pandas --quiet

Import statements for model initialization, vertexAI initialization and pandas

In [26]:
from vertexai.preview.generative_models import GenerativeModel
from vertexai.preview.generative_models import SafetySetting, HarmCategory
from vertexai import init
import pandas as pd

Declare Constants

In [27]:
PROJECT_ID='qwiklabs-gcp-00-1c0ebb19fb7c'
LOCATION='us-central1'
DATASET = "AlaskaDeptOfSnow"
TABLE = "alaska_dept"
TABLE_EMBEDDED = "alaska_dept_embedded"
MODEL_NAME = "gemini-2.0-flash"
DATASET_URI = "gs://labs.roitraining.com/alaska-dept-of-snow/alaska-dept-of-snow-faqs.csv"

Initializing Vertex AI

In [28]:
init(project=PROJECT_ID, location=LOCATION)


# **Source File Information**

Import cloud storage to view alaksa source

In [29]:
from google.cloud import storage

In [30]:
bucket_name = "labs.roitraining.com"
prefix = "alaska-dept-of-snow"

Bucket info: To view that type of files the source/bucket contains

In [31]:
client = storage.Client()
bucket = client.bucket(bucket_name)

blobs = client.list_blobs(bucket_name, prefix=prefix)

print("Files in GCS path:")
for blob in blobs:
    print(blob.name)

Files in GCS path:
alaska-dept-of-snow/.DS_Store
alaska-dept-of-snow/alaska-dept-of-snow-faqs.csv
alaska-dept-of-snow/faq-01.txt
alaska-dept-of-snow/faq-02.txt
alaska-dept-of-snow/faq-03.txt
alaska-dept-of-snow/faq-04.txt
alaska-dept-of-snow/faq-05.txt
alaska-dept-of-snow/faq-06.txt
alaska-dept-of-snow/faq-07.txt
alaska-dept-of-snow/faq-08.txt
alaska-dept-of-snow/faq-09.txt
alaska-dept-of-snow/faq-10.txt
alaska-dept-of-snow/faq-11.txt
alaska-dept-of-snow/faq-12.txt
alaska-dept-of-snow/faq-13.txt
alaska-dept-of-snow/faq-14.txt
alaska-dept-of-snow/faq-15.txt
alaska-dept-of-snow/faq-16.txt
alaska-dept-of-snow/faq-17.txt
alaska-dept-of-snow/faq-18.txt
alaska-dept-of-snow/faq-19.txt
alaska-dept-of-snow/faq-20.txt
alaska-dept-of-snow/faq-21.txt
alaska-dept-of-snow/faq-22.txt
alaska-dept-of-snow/faq-23.txt
alaska-dept-of-snow/faq-24.txt
alaska-dept-of-snow/faq-25.txt
alaska-dept-of-snow/faq-26.txt
alaska-dept-of-snow/faq-27.txt
alaska-dept-of-snow/faq-28.txt
alaska-dept-of-snow/faq-29.txt
ala

To see the schema for the csv file from storage bucket

In [32]:
gcs_uri = "gs://labs.roitraining.com/alaska-dept-of-snow/alaska-dept-of-snow-faqs.csv"
df = pd.read_csv(gcs_uri)
df.head()
len(df)

50

In [33]:
gcs_uri = "gs://labs.roitraining.com/alaska-dept-of-snow/faq-04.txt"
# df = pd.read_csv(gcs_uri, sep="\n", header=None, names=["text"])
data = pd.read_csv(gcs_uri, sep="\t", header=None)

data

Unnamed: 0,0
0,Who do I contact to report an unplowed road?
1,Contact your local ADS regional office. Each r...


In [34]:
df.dtypes

Unnamed: 0,0
question,object
answer,object


# **RAG Process**

Importing bigquery package

In [35]:
from google.cloud import bigquery

Initializing big query

In [36]:
bq_client = bigquery.Client(project="qwiklabs-gcp-00-1c0ebb19fb7c")

1. Creating Embedding model
2. Loading data to bigquery table
3. Generate embeddings for the loaded data

In [37]:
embedding_and_dataloading_pipeline_sql = """
-- 1. Create Embedding Model
CREATE OR REPLACE MODEL `AlaskaDeptOfSnow.Embeddings`
REMOTE WITH CONNECTION `us.embedding_conn_us`
OPTIONS (ENDPOINT = 'text-embedding-005');

-- 2. Load CSV from GCS
LOAD DATA OVERWRITE AlaskaDeptOfSnow.alaska_dept (
    question STRING,
    answer STRING
)
FROM FILES (
    format = 'CSV',
    uris = ['gs://labs.roitraining.com/alaska-dept-of-snow/alaska-dept-of-snow-faqs.csv'],
    skip_leading_rows = 1
);

-- 3. Generate Embeddings
CREATE OR REPLACE TABLE `AlaskaDeptOfSnow.alaska_dept_embedded` AS
SELECT *
FROM ML.GENERATE_EMBEDDING(
    MODEL `AlaskaDeptOfSnow.Embeddings`,
    (
      SELECT
        CONCAT("Q: ", question, " A: ", answer) AS content,
        question,
        answer
      FROM
        `AlaskaDeptOfSnow.alaska_dept`
    )
);
"""

# Run the SQL
bq_client.query(embedding_and_dataloading_pipeline_sql).result()

<google.cloud.bigquery.table._EmptyRowIterator at 0x7f722811ed50>

In [40]:
!pip install --upgrade pyarrow --quiet

To check whether the data was loaded

In [39]:
query = "SELECT * FROM `AlaskaDeptOfSnow.alaska_dept` LIMIT 5"
df = bq_client.query(query).to_dataframe()

print(df.shape)
print(df.head())


(5, 2)
                                            question  \
0  When was the Alaska Department of Snow establi...   
1  What is the mission of the Alaska Department o...   
2  How does ADS coordinate plowing across differe...   
3       Who do I contact to report an unplowed road?   
4         Does ADS oversee school closure decisions?   

                                              answer  
0  The Alaska Department of Snow (ADS) was establ...  
1  Our mission is to ensure safe, efficient trave...  
2  ADS works with local municipalities and region...  
3  Contact your local ADS regional office. Each r...  
4  While ADS provides data on snow conditions, fi...  
