In [79]:
import os
from google.cloud import storage
from google.cloud import aiplatform
from llama_index.llms.vertex import Vertex
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.ingestion import IngestionPipeline, IngestionCache
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.embeddings.vertex import VertexTextEmbedding
from google.oauth2 import service_account
import logging
import sys

In [62]:
# Google Service Account credentials
credentials = service_account.Credentials.from_service_account_file(
    "credential/ai-sandbox-company-73-2659f4150720.json"
)

# Ensure OpenAI is not being referenced
os.environ["OPENAI_API_KEY"] = ""


embedding = VertexTextEmbedding(
    model_name="textembedding-gecko@003",
    project="ai-sandbox-company-73",
    location="asia-southeast1",
    credentials=credentials
)

Settings.embed_model = embedding

Getting Data

In [35]:
def ensure_directory_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Directory {directory} created.")
    else:
        print(f"Directory {directory} already exists.")

def download_files(bucket_name, source_prefix, destination_folder):
    # Create a client
    client = storage.Client()

    # Get the bucket
    bucket = client.bucket(bucket_name)

    # List all blobs in the bucket with the given prefix
    blobs = bucket.list_blobs(prefix=source_prefix)

    # Ensure the destination directory exists
    ensure_directory_exists(destination_folder)

    # Download each matching blob
    for blob in blobs:
        if blob.name.endswith('.pdf'):
            destination_path = os.path.join(destination_folder, os.path.basename(blob.name))
            blob.download_to_filename(destination_path)
            print(f"Downloaded: {blob.name} to {destination_path}")

bucket_name = "jacobs_codes-standards"
source_blob_name = "trial/"
destination_file_name = "./content/"
download_files(bucket_name, source_blob_name, destination_file_name)

Directory ./content/ already exists.
Downloaded: trial/CP 83-1-2004 (2015) CoP for construction CAD - Organisation n naming of CAD layers.pdf to ./content/CP 83-1-2004 (2015) CoP for construction CAD - Organisation n naming of CAD layers.pdf
Downloaded: trial/GFA Handbook_August 2019.pdf to ./content/GFA Handbook_August 2019.pdf
Downloaded: trial/LTA_CIVIL DESIGN CRITERIA.pdf to ./content/LTA_CIVIL DESIGN CRITERIA.pdf
Downloaded: trial/accessibilitycode2019.pdf to ./content/accessibilitycode2019.pdf


Loading Data

In [44]:
documents = SimpleDirectoryReader("./content/").load_data()

In [63]:
# Create the index with Vertex AI LLM explicitly
index = VectorStoreIndex.from_documents(documents)


In [80]:


logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

query_engine = index.as_query_engine()

In [81]:
response = query_engine.query("Intermediate landing requirement for ramps")
print(response) 

DEBUG:llama_index.core.indices.utils:> Top 2 nodes:
> [Node baed03d8-d67a-4c86-8ef5-1ab97cd3451b] [Similarity score:             0.772752] CODE  ON ACCESSIBILITY 2019  
 
64 C h a p t e r  4 
 4.6.5.2  Landings:  
 
(a) must  have a lev...
> [Node 2b0d1b41-9881-4dd6-a6f9-fb30ed70da80] [Similarity score:             0.754826] CODE  ON ACCESSIBILITY 2019  
 
62 C h a p t e r  4 
 4.6.1.2  Where the horizontal run of an app...
> Top 2 nodes:
> [Node baed03d8-d67a-4c86-8ef5-1ab97cd3451b] [Similarity score:             0.772752] CODE  ON ACCESSIBILITY 2019  
 
64 C h a p t e r  4 
 4.6.5.2  Landings:  
 
(a) must  have a lev...
> [Node 2b0d1b41-9881-4dd6-a6f9-fb30ed70da80] [Similarity score:             0.754826] CODE  ON ACCESSIBILITY 2019  
 
62 C h a p t e r  4 
 4.6.1.2  Where the horizontal run of an app...
Landings are required at intervals not exceeding the maximum length specified for various gradients.  A 1:12 gradient requires landings every 6 meters, a 1:14 gradient every 9 meter

In [82]:
index.storage_context.persist()

DEBUG:fsspec.local:open file: /Users/dylim/Documents/projects/AITrailblazers/storage/docstore.json
open file: /Users/dylim/Documents/projects/AITrailblazers/storage/docstore.json
DEBUG:fsspec.local:open file: /Users/dylim/Documents/projects/AITrailblazers/storage/index_store.json
open file: /Users/dylim/Documents/projects/AITrailblazers/storage/index_store.json
DEBUG:fsspec.local:open file: /Users/dylim/Documents/projects/AITrailblazers/storage/graph_store.json
open file: /Users/dylim/Documents/projects/AITrailblazers/storage/graph_store.json
DEBUG:fsspec.local:open file: /Users/dylim/Documents/projects/AITrailblazers/storage/default__vector_store.json
open file: /Users/dylim/Documents/projects/AITrailblazers/storage/default__vector_store.json
DEBUG:fsspec.local:open file: /Users/dylim/Documents/projects/AITrailblazers/storage/image__vector_store.json
open file: /Users/dylim/Documents/projects/AITrailblazers/storage/image__vector_store.json
