<a href="https://colab.research.google.com/github/Komal-patra/RAG/blob/main/3_Query_Routing_and_Query_Construction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q -U python-dotenv
!pip install -q -U sentence-transformers
!pip install -q -U faiss-cpu
!pip install -q -U huggingface_hub
!pip install -q -U langchain langchain-community langchain-core langchain-groq

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m436.4/436.4 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m63.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m401.8/401.8 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.5/106.5 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
from google.colab import userdata
import os

In [4]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = userdata.get('LANGSMITH_API_KEY')
os.environ['LANGCHAIN_PROJECT'] = 'The RAG Query Routing and Construction'
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')

# Query Routing

## Logical Routing

In [5]:
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [6]:
## Data Model

class RouteQuery(BaseModel):
  """ Route the user query to most relevant datasource documentation"""

  datasource : Literal["python-docs","js-docs","golang-docs"] = Field(
      ...,
      description="Given a user question choose which datasource would be most relevant for answering their question",
      )

In [7]:
## LLM with the functional call

llm = ChatGroq(temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)

In [10]:

# We used the .with_structured_output function to guide the LLM to produce structured output
# in the format of the RouteQuery Pydantic model.

# This is important because we want the LLM to provide not just a free-form response,
# but a specific, structured response indicating the most relevant documentation
# for answering a given user question.

# By using .with_structured_output, we ensure the LLM understands the expected output
# format and can provide the specific "datasource" field that we're interested in.


In [8]:
## Prompt

system = """You are an expert at routing a user question to the appropriate data source.
Based on the programming language the question is referring to,
route it to the relevant data source.
"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system),
    ("human", "{question}"),
])

In [9]:
## Define router

router = prompt | structured_llm

In [11]:
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""

result = router.invoke({"question": question})

In [12]:
result

RouteQuery(datasource='python-docs')

### Semantic Routing

In [13]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

In [15]:
## Two prompts


physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

In [16]:
## Embed the prompts

model_name = "BAAI/bge-small-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
prompt_templates = [physics_template, math_template]
prompt_embeddings = hf_embeddings.embed_documents(prompt_templates)


  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [17]:
## Routing the question to the prompt

def prompt_router(input):

  # embedding the user query
  query_embedding = hf_embeddings.embed_query(input["query"])

  # compute the similarity
  similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
  most_similar = prompt_templates[similarity.argmax()]

  # Chosen prompt
  print("Using MATH" if most_similar == math_template else "Using PHYSICS")
  return PromptTemplate.from_template(most_similar)

In [18]:
chain = (
    {"query": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | ChatGroq()
    | StrOutputParser()
)

In [19]:
print(chain.invoke("What is the speed of light?"))

Using PHYSICS
The speed of light is approximately 299,792 kilometers per second, or about 186,282 miles per second. This is the speed at which light travels in a vacuum, and it is considered the fastest speed at which anything can travel.


In [20]:
print(chain.invoke("What is the black hole?"))

Using PHYSICS
A black hole is a region in space where the gravitational pull is so strong that nothing, not even light, can escape from it. They are created when massive stars collapse under their own gravity after they have exhausted their nuclear fuel. The core of the star shrinks, and if it is heavy enough, it can form a black hole. Black holes can also merge together and grow in size. They are called "black" holes because they do not emit or reflect any light, making them invisible and impossible to see directly. However, we can detect them by observing their effects on nearby matter and radiation.
