In [None]:
# Copyright 2023 Nils Knieling
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Building AI-powered applications using LangChain, Google Vertex AI and Matching Engine

[![Open in Colab](https://img.shields.io/badge/Open%20in%20Colab-%23F9AB00.svg?logo=googlecolab&logoColor=white)](https://colab.research.google.com/github/Cyclenerd/toolbox/blob/master/notebooks/LangChain_VertexAI_Matching_Engine.ipynb)
[![Open in Vertex AI Workbench](https://img.shields.io/badge/Open%20in%20Vertex%20AI%20Workbench-%234285F4.svg?logo=googlecloud&logoColor=white)](https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/Cyclenerd/toolbox/master/notebooks/LangChain_VertexAI_Matching_Engine.ipynb)
[![View on GitHub](https://img.shields.io/badge/View%20on%20GitHub-181717.svg?logo=github&logoColor=white)](https://github.com/Cyclenerd/toolbox/blob/master/notebooks/LangChain_VertexAI_Matching_Engine.ipynb)

## Install required packages

>⚠️ You may receive a warning to "Restart Runtime" after the packages are installed. Don't worry, the subsequent cells will help you restart the runtime.

In [None]:
#@markdown ### Install dependencies

!sudo apt install jq
!pip install langchain==0.0.244
!pip install -U google-cloud-aiplatform==1.28.1 "shapely < 2.0.0"
!pip install google-cloud-storage==2.8.0
!pip install unstructured==0.8.4

print("☑️ Done")

In [None]:
#@markdown ### Restart runtime

# Automatically restart kernel after installs so that your environment
# can access the new packages.
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

print("☑️ Done")

## Setup Google Cloud environment

>

In [None]:
# @markdown ✏️ Replace the placeholder text below:

# Please fill in these values.
project_id = "test-nils-ai"  # @param {type:"string"}
region = "us-central1"  # @param {type:"string"}
data_bucket = "test-nils-data-man"  # @param {type:"string"}
staging_bucket = "test-nils-staging-man"  # @param {type:"string"}
index_display_name = "test-nils-index-man"  # @param {type:"string"}
vpc_network_name = "vpc-test-nils-matching-engine" # @param {type:"string"}
llm_model = "text-bison@001" # @param {type:"string"}

# Quick input validations.
assert project_id, "⚠️ Please provide a Google Cloud project ID"
assert region, "⚠️ Please provide a Google Cloud region"
assert data_bucket, "⚠️ Please provide a Google Cloud storage bucket with unstructured data"
assert staging_bucket, "⚠️ Please provide a Google Cloud storage bucket for AI staging"
assert index_display_name, "⚠️ Please provide a index name"
assert vpc_network_name, "⚠️ Please provide a VPC network name"
assert llm_model, "⚠️ Please provide a pretrained LLM"

# Configure gcloud.
!gcloud config set project "{project_id}"
!gcloud config set storage/parallel_composite_upload_enabled "True"

print("☑️ Done")

In [6]:
#@markdown ### (Colab only!) Authenticate your Google Cloud Account

# Authenticate gcloud.
from google.colab import auth
auth.authenticate_user()

In [None]:
#@markdown ###  Check authenticated user
current_user = !gcloud auth list \
  --filter="status:ACTIVE" \
  --format="value(account)" \
  --quiet

current_user = current_user[0]
print(f"Current user: {current_user}")

In [None]:
#@markdown ### Enable APIs

# Enable APIs
my_google_apis = [
    "storage.googleapis.com",
    "compute.googleapis.com",
    "notebooks.googleapis.com",
    "aiplatform.googleapis.com",
    "servicenetworking.googleapis.com",
    "vpcaccess.googleapis.com"
]

for api in my_google_apis :
  print(f"Enable API: {api}")
  !gcloud services enable "{api}" \
    --project="{project_id}" \
    --quiet

print("☑️ OK")

In [None]:
#@markdown ### Get project number

project_number = !gcloud projects list \
  --filter="{project_id}" \
  --format="value(PROJECT_NUMBER)" \
  --quiet

project_number = project_number[0]

print(f"Project number: {project_number}")

### Create Storage

In [None]:
#@markdown ### Create storage bucket for data

#@markdown > Only necessary if the bucket does not already exist!
!gcloud storage buckets create 'gs://{data_bucket}' \
  --location='{region}' \
  --uniform-bucket-level-access \
  --quiet

print("☑️ Done")
print(f"Open in console: https://console.cloud.google.com/storage/browser/{data_bucket}")

In [None]:
#@markdown ### Create storage bucket for stating (index)

!gcloud storage buckets create 'gs://{staging_bucket}' \
  --location='{region}' \
  --uniform-bucket-level-access \
  --quiet

print("☑️ Done")

### Create VPC Network

In [None]:
#@markdown #### Create a regional VPC network

!gcloud compute networks create "{vpc_network_name}" \
  --subnet-mode="custom" \
  --bgp-routing-mode="regional" \
  --project="{project_id}" \
  --quiet

print("☑️ Done")

In [None]:
#@markdown #### Create a subnet in the region

!gcloud compute networks subnets create "{vpc_network_name}-{region}" \
  --network="{vpc_network_name}" \
  --region="{region}" \
  --range="10.128.1.0/24" \
  --enable-private-ip-google-access \
  --project="{project_id}" \
  --quiet

print("☑️ Done")

In [None]:
#@markdown #### Create firewall rules

!gcloud compute firewall-rules create "{vpc_network_name}-allow-default" \
  --allow="tcp:22,tcp:3389,icmp" \
  --network="{vpc_network_name}" \
  --project="{project_id}" \
  --quiet
!gcloud compute firewall-rules create "{vpc_network_name}-allow-internal" \
  --allow="all" \
  --source-ranges="10.128.0.0/9" \
  --network="{vpc_network_name}" \
  --project="{project_id}" \
  --quiet

print("☑️ Done")

In [None]:
#@markdown #### Create router in region

!gcloud compute routers create "router-{vpc_network_name}-{region}" \
  --network="{vpc_network_name}" \
  --region="{region}" \
  --project="{project_id}" \
  --quiet

print("☑️ Done")

In [5]:
#@markdown #### Add NAT to router in region

!gcloud compute routers nats create "nat-{vpc_network_name}-{region}" \
  --router="router-{vpc_network_name}-{region}"  \
  --auto-allocate-nat-external-ips \
  --nat-all-subnet-ip-ranges \
  --enable-logging \
  --log-filter=ERRORS_ONLY \
  --min-ports-per-vm=256 \
  --region="{region}" \
  --project="{project_id}" \
  --quiet

print("☑️ Done")

In [None]:
#@markdown #### Reserve IP addresses for VPC peering in the region

!gcloud compute addresses create "peering-{vpc_network_name}" \
  --global \
  --prefix-length=16 \
  --network="{vpc_network_name}" \
  --purpose="VPC_PEERING" \
  --description="Peering range for Matchin Engine" \
  --project="{project_id}" \
  --quiet

print("☑️ Done")

In [None]:
#@markdown #### Connect to `servicenetworking.googleapis.com` via VPC peering

!gcloud services vpc-peerings connect \
  --service="servicenetworking.googleapis.com" \
  --network="{vpc_network_name}" \
  --ranges="peering-{vpc_network_name}" \
  --project="{project_id}" \
  --quiet

print("☑️ Done")

## LangChain & Vertex AI

In [None]:
#@markdown #### Import and print versions

import sys
print(f"☑️ Python: {sys.version}")

# Langchain
import langchain

print(f"☑️ LangChain version: {langchain.__version__}")

# Vertex AI
# https://python.langchain.com/docs/integrations/llms/google_vertex_ai_palm
from google.cloud import aiplatform, aiplatform_v1beta1
from langchain.llms import VertexAI

aiplatform.init(
    project=project_id,
    location=region,
    staging_bucket=staging_bucket
)

print(f"☑️ Vertex AI SDK version: {aiplatform.__version__}")

## Staging

### Directory Loader

In [None]:
# Load documents from bucket.
# This code snippet may run for a few (5-35) minutes.

# https://python.langchain.com/docs/integrations/document_loaders/google_cloud_storage_directory
from langchain.document_loaders import GCSDirectoryLoader

loader = GCSDirectoryLoader(project_name=f"{project_id}", bucket=f"{data_bucket}")
documents = loader.load()

print(f"☑️ You have {len(documents)} documents.")

### Text Splitter



In [None]:
# Split long text descriptions into smaller chunks that can fit into
# the API request size limit, as expected by the LLM providers.

# https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/character_text_splitter
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 100)

document_chunks = text_splitter.split_documents(documents)

print(f"Number documents {len(documents)}")
print(f"Number chunks {len(document_chunks)}")

document_chunks=[f"Context: {chunk.page_content} Source: {chunk.metadata['source']}" for chunk in document_chunks]

print("☑️ Done")

### Embeddings

While the embeddings are stored in the Matching Engine, the embedded    documents will be stored in GCS (must be in folder with name `documents`).

In [None]:
# Create empty local documents folder
!rm -rf ./documents
!mkdir -p ./documents

print("☑️ Done")

In [None]:
# Generate the vector embeddings for each chunk of text.
# This code snippet may run for several (5-30) minutes.

# Utils
import time
import pandas as pd
import json

# https://python.langchain.com/docs/integrations/text_embedding/google_vertex_ai_palm
from langchain.embeddings import VertexAIEmbeddings


def handle_quota_errors(func, *args, retry_delay=5,backoff_factor=2, **kwargs):
    retries = 0

    try:
      return func(*args, **kwargs)
    except Exception as e:
      print(f"error: {e}")
      retries += 1
      wait = retry_delay * (backoff_factor ** retries)
      time.sleep(wait)
      print("wait for {wait} seconds")


embeddings = VertexAIEmbeddings()

df = pd.DataFrame(document_chunks, columns =['text'])

index_embeddings = []

for index, doc in df.iterrows():
  print(f"Get embedding and write document for document {index} of {len(df)-1}")
  embedding = handle_quota_errors(embeddings.embed_query, doc['text'])

  if embedding is not None:

    doc_id=f"{index}.txt"
    embedding_dict = {
              "id": doc_id,
              "embedding": [str(value) for value in embedding],
    }
    index_embeddings.append(json.dumps(embedding_dict) + "\n")

    doc_id = f"{index}.txt"
    with open(f"documents/{doc_id}", "w") as document:
      document.write(doc['text'])


with open("embeddings.json", "w") as f:
    f.writelines(index_embeddings)

print("☑️ Done")

In [None]:
# Test JSON
# It has already happened that the JSON was invalid. If it happens just create it again.
!jq -er '.' < "embeddings.json" > /dev/null && echo "☑️ OK"

In [None]:
# Copy data to staging bucket

!gcloud storage cp --gzip-in-flight-all documents/* gs://{staging_bucket}/documents/
!gcloud storage cp embeddings.json gs://{staging_bucket}/embeddings/embeddings.json

print("☑️ Done")

### Create ANN Index (for Production Usage)


In [None]:
# Create the index configuration
# This code snippet may run for several (>30) minutes.

# ANN = Approximate Nearest Neighbors algorithm

my_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
    display_name=f"{index_display_name}-index",
    contents_delta_uri=f"gs://{staging_bucket}/embeddings/",
    dimensions=768,
    approximate_neighbors_count=150,
    distance_measure_type="DOT_PRODUCT_DISTANCE",
    description=f"Index {index_display_name}",
)

print("☑️ Done")

In [None]:
# Create endpoint

my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
    display_name=f"{index_display_name}-endpoint",
    description=f"Endpoint for index {index_display_name}",
    # Deploy to VPC network.
    network=f"projects/{project_number}/global/networks/{vpc_network_name}"
    # Or use public endpoint.
    # IMPORTANT: Public endpoint is beta.
    # You need to use aiplatform_v1beta1 when _query_ or inserting vectors.
    # https://cloud.google.com/vertex-ai/docs/matching-engine/deploy-index-public
    # public_endpoint_enabled=True
)

print("☑️ Done")

In [None]:
# Deploy index
# This code snippet may run for several (15-50) minutes.
# In the background everything is deployed for you (2x n1-standard-16).

my_deployed_index = my_index_endpoint.deploy_index(
    index=my_index,
    deployed_index_id=index_display_name.replace('-','_')
)

print("☑️ Done")

In [None]:
# Get the index ID
print("Index ID:")
!gcloud ai indexes list \
  --format='value(name)' \
  --project="{project_id}" \
  --region="{region}" \
  --filter="displayName ~ {index_display_name}" \
  --quiet

# Get the index endpoints ID
print("\nEndpoint ID:")
!gcloud ai index-endpoints list \
  --format='value(name)' \
  --filter="displayName ~ {index_display_name}" \
  --project="{project_id}" \
  --region="{region}" \
  --quiet

In [None]:
# @markdown ✏️ Enter and store index and endpoind ID for later use:
my_index_id    = "6751098151567884288" # @param {type:"string"}
my_endpoint_id = "748925748189855744" # @param {type:"string"}

# Quick input validations.
assert my_index_id, "⚠️ Please input a valid index ID"
assert my_endpoint_id, "⚠️ Please input a valid end ID"

print("☑️ OK")

## Vertex Workbench

Deploy Vertex user-managed notebooks instance in VPC network.

In [None]:
# Create user-managed instance for workbench/notebooks
# This code snippet may run for a few (>5) minutes.

print("Please wait...")

# OS images: https://gcloud-compute.com/images.html
!gcloud notebooks instances create "workbench-{vpc_network_name}" \
  --machine-type="n1-standard-2" \
  --vm-image-project="deeplearning-platform-release"\
  --vm-image-family="tf-latest-gpu-ubuntu-2004-py310" \
  --boot-disk-size="75" \
  --boot-disk-type="PD_SSD" \
  --data-disk-size="150"\
  --data-disk-type="PD_SSD" \
  --network="{vpc_network_name}" \
  --subnet="{vpc_network_name}-{region}" \
  --subnet-region="{region}" \
  --no-public-ip \
  --project="{project_id}" \
  --location="{region}-b" \
  --quiet

print("☑️ Done")

### Query

⚠️ Only works withing the same VPC network. Will not work on your local environment or notebooks (like Colab) outside of the network!

Use Vertex user-managed notebooks instance in same VPC network.

In [119]:
# Vertex AI Matching Engine as Vector Store
# https://cloud.google.com/vertex-ai/docs/matching-engine/overview
# https://python.langchain.com/docs/integrations/vectorstores/matchingengine
from langchain.vectorstores.matching_engine import MatchingEngine
from langchain.agents import Tool

from langchain.embeddings import VertexAIEmbeddings
embeddings = VertexAIEmbeddings()

from vertexai.preview.language_models import TextGenerationModel
model = TextGenerationModel.from_pretrained(llm_model)

def matching_engine_search(question):

    vector_store = MatchingEngine.from_components(
                        index_id=my_index_id,
                        region=region,
                        embedding=embeddings,
                        project_id=project_id,
                        endpoint_id=my_endpoint_id,
                        gcs_bucket_name=staging_bucket)

    relevant_documentation=vector_store.similarity_search(question, k=8)
    context = "\n".join([doc.page_content for doc in relevant_documentation])[:6000]
    return str(context)

print("☑️ OK")

In [None]:
# @markdown Enter search query in a simple English text.
user_query = "What is the default shell and the command to show the uptime?"  # @param {type:"string"}

# Quick input validations.
assert user_query, "⚠️ Please input a valid input search text"

matching_engine_response=matching_engine_search(user_query)
print(f"Contex:\n---schnipp---\n{matching_engine_response}\n---schnapp---")

prompt=f"""
I want you to act as a Linux expert. Follow exactly those 3 steps:
1. Read the context below and aggregrate this data
Context : {matching_engine_response}
2. Answer the question using only this context
3. Always show the sources for your answers
User Question: {user_query}


If you don't have any context and are unsure of the answer, reply with 'I don't know about this topic'.
"""

contex_len=len(matching_engine_response)
print(f"Contex length: {contex_len}")
prompt_len=len(prompt)
print(f"Prompt length: {prompt_len}")

response = model.predict(
        prompt,
        temperature=0.2,
        top_k=40,
        top_p=.8,
        max_output_tokens=1024,
)

print(f"Question: \n{user_query}")
print(f"Response: \n{response.text}")

## Helper

In [None]:
# Lists the indexes of project in region
!gcloud ai indexes list \
  --project="{project_id}" \
  --region="{region}"

In [None]:
# Lists the index endpoints of project in region
!gcloud ai index-endpoints list \
  --project="{project_id}" \
  --region="{region}"

In [None]:
# Get the index public domain name
# https://cloud.google.com/vertex-ai/docs/matching-engine/deploy-index-public#get_the_index_domain_name
!gcloud ai index-endpoints list \
  --format='value(publicEndpointDomainName)' \
  --filter="displayName ~ {index_display_name}" \
  --project="{project_id}" \
  --region="{region}" \
  --quiet

In [None]:
# List notebooks instances in a region/location
!gcloud notebooks instances list \
  --location="{region}-b" \
  --project="{project_id}" \
  --quiet

## Clean up

### Index

In [None]:
# Undeploy the deployed-index
deployed_index_id = index_display_name.replace('-','_')

print(f"Undeploy index: {deployed_index_id}")
print(f"Endpoint ID: {my_endpoint_id}")
print(f"Project: {project_id}")

!gcloud ai index-endpoints undeploy-index '{my_endpoint_id}' \
  --deployed-index-id="{deployed_index_id}" \
  --project="{project_id}" \
  --region="{region}" \
  --quiet
print("☑️ Done")

In [None]:
# Delete index endpoint 123 of project in region
!gcloud ai index-endpoints delete '{my_endpoint_id}' \
  --project="{project_id}" \
  --region="{region}" \
  --quiet
print("☑️ Done")

In [None]:
# Delete index 123 of project in region
!gcloud ai indexes delete '{my_index_id}' \
  --project="{project_id}" \
  --region="{region}" \
  --quiet
print("☑️ Done")

### Storage

In [None]:
# Delete data bucket
! gcloud storage rm -r 'gs://{data_bucket}' \
  --project="{project_id}" \
  --quiet
print("☑️ Done")

In [None]:
# Delete staging bucket
! gcloud storage rm -r 'gs://{staging_bucket}' \
  --project="{project_id}" \
  --quiet
print("☑️ Done")

### Vertex Workbench

In [None]:
# Delete user-managed notebooks/workbench instance
print("Please wait...")
!gcloud notebooks instances delete "workbench-{vpc_network_name}" \
  --project="{project_id}" \
  --location="{region}-b" \
  --quiet
print("☑️ Done")

### Network

In [None]:
# Delete peering to servicenetworking.googleapis.com
print("Please wait...")
!gcloud services vpc-peerings delete \
  --service="servicenetworking.googleapis.com" \
  --network="{vpc_network_name}" \
  --project="{project_id}" \
  --quiet
print("☑️ Done")

In [None]:
# Delete IP addresses for VPC peering in the region
print("Please wait...")
!gcloud compute addresses delete "peering-{vpc_network_name}" \
  --global \
  --project="{project_id}" \
  --quiet
print("☑️ Done")

In [None]:
# Delete NAT router in region
print("Please wait...")
!gcloud compute routers nats delete "nat-{vpc_network_name}-{region}" \
  --router="router-{vpc_network_name}-{region}"  \
  --region="{region}" \
  --project="{project_id}" \
  --quiet
print("☑️ Done")

In [None]:
# Delete router
print("Please wait...")
!gcloud compute routers delete "router-{vpc_network_name}-{region}" \
  --region="{region}" \
  --project="{project_id}" \
  --quiet
print("☑️ Done")

In [None]:
# Delete subnet
print("Please wait...")
!gcloud compute networks subnets delete "{vpc_network_name}-{region}" \
  --region="{region}" \
  --project="{project_id}" \
  --quiet
print("☑️ Done")

In [None]:
# Delete VPC
print("Please wait...")
!gcloud compute networks delete "{vpc_network_name}" \
  --project="{project_id}" \
  --quiet
print("☑️ Done")