In [None]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain
! pip install langchain-google-genai

### Environment

In [None]:
# Langsmith
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = "LANGCHAIN_API_KEY"

In [None]:
os.environ['GOOGLE_API_KEY'] = "GOOGLE_API_KEY"

In [None]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/gen-lang-client-0604130497-70fe0d4700a5.json"

In [None]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI

api_key = os.getenv("AIzaSyAlc9MyZvCsugoa4eKjkHtndPKav8lv2GY")

# Initialize the language model
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    max_tokens=500,
    api_key=api_key
)

# Call the model
response = llm.invoke("Hello, world!")
print(response.content)


Hello to you too!


### Logical and semantic routing
* use function calling for classification

In [None]:
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI

# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""

    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question choose which datasource would be most relevant for answering their question",
    )



In [None]:
# LLM with function call
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    api_key=api_key)

structured_llm = llm.with_structured_output(RouteQuery)


In [None]:
# Prompt

system = """you are an expert at routing a user question to its most relevant datasource.

Based on programming language the question is rwferring to ,route it to relevant data source"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

# Define router
router = prompt | structured_llm

In [None]:
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""

result = router.invoke({"question": question})

In [None]:
result

RouteQuery(datasource='python_docs')

In [None]:
result.datasource

'python_docs'

In [None]:
def choose_route(result):
    if "python_docs" in result.datasource.lower():
        return "chain for python_docs"
    elif "js_docs" in result.datasource.lower():
        return "chain for js_docs"
    else:
        return "golang_docs"

from langchain_core.runnables import RunnableLambda

full_chain = router | RunnableLambda(choose_route)

In [None]:
full_chain.invoke({"question": question})

'chain for python_docs'

## Semantic Routing

In [None]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings


# We will give two prompts for semantic8i

physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

In [None]:
# Embed the prompts
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

In [None]:
# route questions to prompt

def prompt_router(input):
   # Embed the input query
  query_embedding = embeddings.embed_query(input["query"])

  # find similar embeddings to prompt
  similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
  #take the first embeddings from most similar ones
  most_similar = prompt_templates[similarity.argmax()]
  print(f"Most similar prompt: {most_similar}")

  # Chosen prompt
  print("Using MATH" if most_similar == math_template else "Using PHYSICS")
  return PromptTemplate.from_template(most_similar)


In [None]:
#Define the chain
chain = (
    {"query":RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0, api_key=api_key)
    | StrOutputParser()
)

print(chain.invoke("what is modern physics"))

Most similar prompt: You are a very smart physics professor. You are great at answering questions about physics in a concise and easy to understand manner. When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}
Using PHYSICS
Modern physics generally refers to developments in physics starting around the year 1900, with the introduction of Planck's quantum theory and Einstein's theory of relativity.  It contrasts with "classical physics" which encompasses Newtonian mechanics, Maxwell's electromagnetism, and thermodynamics developed before 1900.  Essentially, modern physics deals with the very small (quantum mechanics), the very fast (relativity), and the very massive (cosmology and astrophysics).


Trace: [Modern Physics](https://smith.langchain.com/o/60da6ebd-4ff3-45dc-a06f-87d423eb5049/projects/p/f2716ee1-d256-4a10-940f-f222ddbef534?timeModel=%7B%22duration%22%3A%227d%22%7D&peek=fd5c044c-320c-416b-95bc-9dea135d6515)

# Query Construction
* For in depth Knowlwdge refer this [langchain documentation blog](https://blog.langchain.dev/query-construction/)
* [Enhancing rag based applications accuracy](https://blog.langchain.dev/enhancing-rag-based-applications-accuracy-by-constructing-and-leveraging-knowledge-graphs/)

---



### Query constructing for metadata filters

In [None]:
!pip install youtube-transcript-api
!pip install pytube

Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl.metadata (5.0 kB)
Downloading pytube-15.0.0-py3-none-any.whl (57 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytube
Successfully installed pytube-15.0.0


In [None]:
from langchain_community.document_loaders import WebBaseLoader

docs = WebBaseLoader(
    "https://github.com/langchain-ai/langgraph/tree/main/examples/rag"
).load()

docs[0].metadata



{'source': 'https://github.com/langchain-ai/langgraph/tree/main/examples/rag',
 'title': 'langgraph/examples/rag at main · langchain-ai/langgraph · GitHub',
 'description': 'Build resilient language agents as graphs. Contribute to langchain-ai/langgraph development by creating an account on GitHub.',
 'language': 'en'}

In [None]:
import datetime
from typing import Literal, Optional, Tuple
from langchain_core.pydantic_v1 import BaseModel, Field

class quert_search(BaseModel):
    """Search over a database of github about a software library."""

    content_search: str = Field(
        ...,
        description="Similarity search query applied to github notebooks.",
    )
    title_search: str = Field(
        ...,
        description=(
            "Alternate version of the content search query to apply to github notebooks. "
            "Should be succinct and only include key words that could be in a notebooks "
            "title."
        ),
    )
    def pretty_print(self) -> None:
        for field in self.__fields__:
            if getattr(self, field) is not None and getattr(self, field) != getattr(
                self.__fields__[field], "default", None
            ):
                print(f"{field}: {getattr(self, field)}")

In [None]:
# Prompt for llm
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

system = """You are an expert at converting user questions into database queries. \
You have access to a database of colab notebooks about a software library for building LLM-powered applications. \
Given a question, return a database query optimized to retrieve the most relevant results.

If there are acronyms or words you are not familiar with, do not try to rephrase them."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

In [None]:
# llm
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    api_key=api_key)

structured_llm = llm.with_structured_output(quert_search)
query_analyzer = prompt | structured_llm

In [None]:
query_analyzer.invoke({"question": "rag from scratch"}).pretty_print()

content_search: rag from scratch
title_search: rag


In [None]:
query_analyzer.invoke(
    {"question": "Notebooks on chat langchain published in 2023"}
).pretty_print()

content_search: chat langchain published:2023
title_search: chat langchain


In [None]:
query_analyzer.invoke(
    {"question": "Notebooks that are focused on the topic of chat langchain that are published before 2024"}
).pretty_print()

content_search: chat langchain
title_search: chat langchain


In [None]:
query_analyzer.invoke(
    {
        "question": "how to use multi-modal models in an agent, only videos under 5 minutes"
    }
).pretty_print()