## Advanced RAG

In [91]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
from langchain_community.document_loaders import GitLoader


def file_filter(file_path: str) -> bool:
    return file_path.endswith(".mdx")


loader = GitLoader(
    clone_url="https://github.com/langchain-ai/langchain",
    repo_path="./langchain",
    branch="master",
    file_filter=file_filter,
)

documents = loader.load()
print(len(documents))

384


In [15]:
documents[0].metadata

{'source': 'cookbook/sql_db_qa.mdx',
 'file_path': 'cookbook/sql_db_qa.mdx',
 'file_name': 'sql_db_qa.mdx',
 'file_type': '.mdx'}

In [13]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
db = Chroma.from_documents(documents, embeddings, persist_directory="./chroma")

In [16]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template('''\
‰ª•‰∏ã„ÅÆÊñáËÑà„Å†„Åë„ÇíË∏è„Åæ„Åà„Å¶Ë≥™Âïè„Å´ÂõûÁ≠î„Åó„Å¶„Åè„Å†„Åï„ÅÑ„ÄÇ

ÊñáËÑà: """
{context}
"""

Ë≥™Âïè: {question}
''')

model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

retriever = db.as_retriever()

chain = {
    "question": RunnablePassthrough(),
    "context": retriever,
} | prompt | model | StrOutputParser()

chain.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

'LangChain„ÅØ„ÄÅÂ§ßË¶èÊ®°Ë®ÄË™û„É¢„Éá„É´ÔºàLLMÔºâ„ÇíÊ¥ªÁî®„Åó„Åü„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÈñãÁô∫„Åô„Çã„Åü„ÇÅ„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„Åß„Åô„ÄÇ„Åì„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„ÅØ„ÄÅLLM„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÅÆ„É©„Ç§„Éï„Çµ„Ç§„ÇØ„É´„ÅÆÂêÑÊÆµÈöé„ÇíÁ∞°Á¥†Âåñ„Åó„Åæ„Åô„ÄÇÂÖ∑‰ΩìÁöÑ„Å´„ÅØ„ÄÅ‰ª•‰∏ã„ÅÆ„Çà„ÅÜ„Å™Ê©üËÉΩ„Åå„ÅÇ„Çä„Åæ„Åô„ÄÇ\n\n1. **ÈñãÁô∫**: LangChain„ÅÆ„Ç™„Éº„Éó„É≥„ÇΩ„Éº„Çπ„Ç≥„É≥„Éù„Éº„Éç„É≥„Éà„ÇÑ„Çµ„Éº„Éâ„Éë„Éº„ÉÜ„Ç£„ÅÆÁµ±Âêà„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊßãÁØâ„Åß„Åç„Åæ„Åô„ÄÇ„Åæ„Åü„ÄÅLangGraph„ÇíÂà©Áî®„Åó„Å¶„ÄÅÁä∂ÊÖã„ÇíÊåÅ„Å§„Ç®„Éº„Ç∏„Çß„É≥„Éà„ÇíÊßãÁØâ„Åó„ÄÅ„Çπ„Éà„É™„Éº„Éü„É≥„Ç∞„ÇÑ‰∫∫Èñì„ÅÆ‰ªãÂÖ•„Çí„Çµ„Éù„Éº„Éà„Åó„Åæ„Åô„ÄÇ\n\n2. **ÁîüÁî£Âåñ**: LangSmith„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊ§úÊüª„ÄÅÁõ£Ë¶ñ„ÄÅË©ï‰æ°„Åó„ÄÅÁ∂ôÁ∂öÁöÑ„Å´ÊúÄÈÅ©Âåñ„Åó„Å¶Ëá™‰ø°„ÇíÊåÅ„Å£„Å¶„Éá„Éó„É≠„Ç§„Åß„Åç„Åæ„Åô„ÄÇ\n\n3. **„Éá„Éó„É≠„Ç§**: LangGraph„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÁîüÁî£Ê∫ñÂÇô„ÅåÊï¥„Å£„ÅüAPI„ÇÑ„Ç¢„Ç∑„Çπ„Çø„É≥„Éà„Å´Â§âÊèõ„Åß„Åç„Åæ„Åô„ÄÇ\n\nLangChain„ÅØ„ÄÅLLM„ÇÑÈñ¢ÈÄ£ÊäÄË°ìÔºàÂüã„ÇÅËæº„Å

## 6.3. Ê§úÁ¥¢„ÇØ„Ç®„É™„ÅÆÂ∑•Â§´
### HyDEÔºàHypothetical Document EmbeddingsÔºâ

In [17]:
hypothetical_prompt = ChatPromptTemplate.from_template("""\
Ê¨°„ÅÆË≥™Âïè„Å´ÂõûÁ≠î„Åô„Çã‰∏ÄÊñá„ÇíÊõ∏„ÅÑ„Å¶„Åè„Å†„Åï„ÅÑ„ÄÇ

Ë≥™Âïè: {question}
""")

hypothetical_chain = hypothetical_prompt | model | StrOutputParser()

In [18]:
hyde_rag_chain = {
    "question": RunnablePassthrough(),
    "context": hypothetical_chain | retriever,
} | prompt | model | StrOutputParser()

hyde_rag_chain.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

'LangChain„ÅØ„ÄÅÂ§ßË¶èÊ®°Ë®ÄË™û„É¢„Éá„É´ÔºàLLMÔºâ„ÇíÊ¥ªÁî®„Åó„Åü„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÈñãÁô∫„Åô„Çã„Åü„ÇÅ„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„Åß„Åô„ÄÇLangChain„ÅØ„ÄÅ„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÅÆ„É©„Ç§„Éï„Çµ„Ç§„ÇØ„É´„ÅÆÂêÑÊÆµÈöé„ÇíÁ∞°Á¥†Âåñ„Åó„Åæ„Åô„ÄÇÂÖ∑‰ΩìÁöÑ„Å´„ÅØ„ÄÅ‰ª•‰∏ã„ÅÆ„Çà„ÅÜ„Å™Ê©üËÉΩ„ÇíÊèê‰æõ„Åó„Å¶„ÅÑ„Åæ„Åô„ÄÇ\n\n1. **ÈñãÁô∫**: LangChain„ÅÆ„Ç™„Éº„Éó„É≥„ÇΩ„Éº„Çπ„Ç≥„É≥„Éù„Éº„Éç„É≥„Éà„ÇÑ„Çµ„Éº„Éâ„Éë„Éº„ÉÜ„Ç£„ÅÆÁµ±Âêà„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊßãÁØâ„Åß„Åç„Åæ„Åô„ÄÇ„Åæ„Åü„ÄÅLangGraph„ÇíÂà©Áî®„Åó„Å¶„ÄÅÁä∂ÊÖã„ÇíÊåÅ„Å§„Ç®„Éº„Ç∏„Çß„É≥„Éà„ÇíÊßãÁØâ„Åô„Çã„Åì„Å®„Åå„Åß„Åç„Åæ„Åô„ÄÇ\n\n2. **ÁîüÁî£Âåñ**: LangSmith„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊ§úÊüª„ÄÅÁõ£Ë¶ñ„ÄÅË©ï‰æ°„Åó„ÄÅÁ∂ôÁ∂öÁöÑ„Å´ÊúÄÈÅ©Âåñ„Åó„Å¶Ëá™‰ø°„ÇíÊåÅ„Å£„Å¶„Éá„Éó„É≠„Ç§„Åß„Åç„Åæ„Åô„ÄÇ\n\n3. **„Éá„Éó„É≠„Ç§„É°„É≥„Éà**: LangGraph„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÁîüÁî£Ê∫ñÂÇô„ÅåÊï¥„Å£„ÅüAPI„ÇÑ„Ç¢„Ç∑„Çπ„Çø„É≥„Éà„Å´Â§âÊèõ„Åß„Åç„Åæ„Åô„ÄÇ\n\nLangChain„ÅØ„ÄÅ„Åï„Åæ„Åñ„Åæ„Å™„Éó„É≠„Éê„Ç§„ÉÄ„Éº„Å®Áµ±Âêà„Åó„ÄÅÊ®ôÊ∫ñ„Ç§„É≥„Çø„Éº„Éï„Çß„Éº„Ç

In [19]:
hypothetical_chain.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

'LangChain„ÅØ„ÄÅË®ÄË™û„É¢„Éá„É´„ÇíÊ¥ªÁî®„Åó„Åü„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÅÆÈñãÁô∫„ÇíÂÆπÊòì„Å´„Åô„Çã„Åü„ÇÅ„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„Åß„ÄÅ„Éá„Éº„Çø„ÅÆÂèñÂæó„ÄÅÂá¶ÁêÜ„ÄÅÂá∫Âäõ„ÅÆÂêÑ„Çπ„ÉÜ„ÉÉ„Éó„ÇíÁµ±Âêà„Åó„ÄÅÂØæË©±Âûã„Ç®„Éº„Ç∏„Çß„É≥„Éà„ÇÑËá™ÂãïÂåñ„ÉÑ„Éº„É´„ÅÆÊßãÁØâ„Çí„Çµ„Éù„Éº„Éà„Åó„Åæ„Åô„ÄÇ'

## Ë§áÊï∞„ÅÆÊ§úÁ¥¢„ÇØ„Ç®„É™„ÅÆÁîüÊàê

In [66]:
from pydantic import BaseModel, Field


class QueryGenerationOutput(BaseModel):
    queries: list[str] = Field(..., description="Ê§úÁ¥¢„ÇØ„Ç®„É™„ÅÆ„É™„Çπ„Éà")


query_generation_prompt = ChatPromptTemplate.from_template("""\
Ë≥™Âïè„Å´ÂØæ„Åó„Å¶„Éô„ÇØ„Çø„Éº„Éá„Éº„Çø„Éô„Éº„Çπ„Åã„ÇâÈñ¢ÈÄ£ÊñáÊõ∏„ÇíÊ§úÁ¥¢„Åô„Çã„Åü„ÇÅ„Å´„ÄÅ
3„Å§„ÅÆÁï∞„Å™„ÇãÊ§úÁ¥¢„ÇØ„Ç®„É™„ÇíÁîüÊàê„Åó„Å¶„Åè„Å†„Åï„ÅÑ„ÄÇ
Ë∑ùÈõ¢„Éô„Éº„Çπ„ÅÆÈ°û‰ººÊÄßÊ§úÁ¥¢„ÅÆÈôêÁïå„ÇíÂÖãÊúç„Åô„Çã„Åü„ÇÅ„Å´„ÄÅ
„É¶„Éº„Ç∂„Éº„ÅÆË≥™Âïè„Å´ÂØæ„Åó„Å¶Ë§áÊï∞„ÅÆË¶ñÁÇπ„ÇíÊèê‰æõ„Åô„Çã„Åì„Å®„ÅåÁõÆÊ®ô„Åß„Åô„ÄÇ

Ë≥™Âïè: {question}
""")

query_generation_chain = (
    query_generation_prompt
    | model.with_structured_output(QueryGenerationOutput)
    | (lambda x: x.queries)
)

In [67]:
multi_query_rag_chain = {
    "question": RunnablePassthrough(),
    "context": query_generation_chain | retriever.map(),
} | prompt | model | StrOutputParser()

multi_query_rag_chain.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

'LangChain„ÅØ„ÄÅÂ§ßË¶èÊ®°Ë®ÄË™û„É¢„Éá„É´ÔºàLLMÔºâ„ÇíÊ¥ªÁî®„Åó„Åü„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÈñãÁô∫„Åô„Çã„Åü„ÇÅ„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„Åß„Åô„ÄÇLangChain„ÅØ„ÄÅÈñãÁô∫„ÄÅÈÅãÁî®„ÄÅ„Éá„Éó„É≠„Ç§„ÅÆÂêÑÊÆµÈöé„ÇíÁ∞°Á¥†Âåñ„Åô„Çã„Åì„Å®„ÇíÁõÆÁöÑ„Å®„Åó„Å¶„ÅÑ„Åæ„Åô„ÄÇÂÖ∑‰ΩìÁöÑ„Å´„ÅØ„ÄÅ‰ª•‰∏ã„ÅÆ„Çà„ÅÜ„Å™ÁâπÂæ¥„Åå„ÅÇ„Çä„Åæ„Åô„ÄÇ\n\n1. **ÈñãÁô∫**: LangChain„ÅÆ„Ç™„Éº„Éó„É≥„ÇΩ„Éº„Çπ„Ç≥„É≥„Éù„Éº„Éç„É≥„Éà„ÇÑ„Çµ„Éº„Éâ„Éë„Éº„ÉÜ„Ç£„ÅÆÁµ±Âêà„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊßãÁØâ„Åß„Åç„Åæ„Åô„ÄÇ„Åæ„Åü„ÄÅLangGraph„ÇíÂà©Áî®„Åó„Å¶„ÄÅÁä∂ÊÖã„ÇíÊåÅ„Å§„Ç®„Éº„Ç∏„Çß„É≥„Éà„ÇíÊßãÁØâ„Åó„ÄÅ„Çπ„Éà„É™„Éº„Éü„É≥„Ç∞„ÇÑ‰∫∫Èñì„ÅÆ‰ªãÂÖ•„Çí„Çµ„Éù„Éº„Éà„Åó„Åæ„Åô„ÄÇ\n\n2. **ÈÅãÁî®**: LangSmith„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÁõ£Ë¶ñ„ÄÅË©ï‰æ°„Åó„ÄÅÁ∂ôÁ∂öÁöÑ„Å´ÊúÄÈÅ©Âåñ„Åó„Å¶Ëá™‰ø°„ÇíÊåÅ„Å£„Å¶„Éá„Éó„É≠„Ç§„Åß„Åç„Åæ„Åô„ÄÇ\n\n3. **„Éá„Éó„É≠„Ç§**: LangGraph„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÁîüÁî£Ê∫ñÂÇô„ÅåÊï¥„Å£„ÅüAPI„ÇÑ„Ç¢„Ç∑„Çπ„Çø„É≥„Éà„Å´Â§âÊèõ„Åß„Åç„Åæ„Åô„ÄÇ\n\nLangChain„ÅØ„ÄÅ„Åï„Åæ„Åñ„Åæ„Å™„É¢„Éá„É´„ÇÑÈñ¢ÈÄ£„Ç≥„É≥„Éù„Éº„Éç„É≥„É

In [68]:
query_generation_chain.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

['LangChain„Å®„ÅØ‰Ωï„Åã„ÄÅ„Åù„ÅÆÂü∫Êú¨ÁöÑ„Å™Ê©üËÉΩ„Å®Áî®ÈÄî„Å´„Å§„ÅÑ„Å¶',
 'LangChain„ÅÆ‰∏ªË¶Å„Å™„Ç≥„É≥„Éù„Éº„Éç„É≥„Éà„Å®„Ç¢„Éº„Ç≠„ÉÜ„ÇØ„ÉÅ„É£„ÅÆË™¨Êòé',
 'LangChain„Çí‰ΩøÁî®„Åó„ÅüÂÖ∑‰ΩìÁöÑ„Å™„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥‰æã„Å®„Åù„ÅÆÂà©ÁÇπ']

In [72]:
query_results_chain = (
    query_generation_prompt
    | model.with_structured_output(QueryGenerationOutput)
    | (lambda x: x.queries)
    | retriever.map()
    # | (lambda docs_list: [doc.page_content for doc in docs_list[0]])
)

In [73]:
result = query_results_chain.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

In [88]:
array1 = np.array([-18, -18, -18])
array2 = np.array([0, 0, 0])

# ÈÄ£Áµê
result = np.concatenate((array1, array2))


In [89]:
result

array([-18, -18, -18,   0,   0,   0])

In [94]:
from typing import Any

from langchain_cohere import CohereRerank
from langchain_core.documents import Document

def rerank(inp: dict[str, Any], top_n: int = 3) -> list[Document]:
    question = inp['question']
    documents = inp['documents']

    cohere_reranker = CohereRerank(model="rerank-multilingual-v3.0", top_n=top_n)
    return cohere_reranker.compress_documents(documents=documents, query=question)

rerank_rag_chain = (
    {
        "question": RunnablePassthrough(),
        "documents": retriever,
    }
    | RunnablePassthrough.assign(context=rerank)
    | prompt | model | StrOutputParser()
)

rerank_rag_chain.invoke('Langchain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶')

'LangChain„ÅØ„ÄÅÂ§ßË¶èÊ®°Ë®ÄË™û„É¢„Éá„É´ÔºàLLMÔºâ„ÇíÊ¥ªÁî®„Åó„Åü„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÈñãÁô∫„Åô„Çã„Åü„ÇÅ„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„Åß„Åô„ÄÇ„Åì„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„ÅØ„ÄÅLLM„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÅÆ„É©„Ç§„Éï„Çµ„Ç§„ÇØ„É´„ÅÆÂêÑÊÆµÈöé„ÇíÁ∞°Á¥†Âåñ„Åó„Åæ„Åô„ÄÇÂÖ∑‰ΩìÁöÑ„Å´„ÅØ„ÄÅ‰ª•‰∏ã„ÅÆ„Çà„ÅÜ„Å™Ê©üËÉΩ„Åå„ÅÇ„Çä„Åæ„Åô„ÄÇ\n\n1. **ÈñãÁô∫**: LangChain„ÅÆ„Ç™„Éº„Éó„É≥„ÇΩ„Éº„Çπ„Ç≥„É≥„Éù„Éº„Éç„É≥„Éà„ÇÑ„Çµ„Éº„Éâ„Éë„Éº„ÉÜ„Ç£„ÅÆÁµ±Âêà„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊßãÁØâ„Åß„Åç„Åæ„Åô„ÄÇLangGraph„ÇíÂà©Áî®„Åô„Çã„Åì„Å®„Åß„ÄÅÁä∂ÊÖã„ÇíÊåÅ„Å§„Ç®„Éº„Ç∏„Çß„É≥„Éà„Çí‰ΩúÊàê„Åó„ÄÅ„Çπ„Éà„É™„Éº„Éü„É≥„Ç∞„ÇÑ‰∫∫Èñì„ÅÆ‰ªãÂÖ•„Çí„Çµ„Éù„Éº„Éà„Åó„Åæ„Åô„ÄÇ\n\n2. **ÁîüÁî£Âåñ**: LangSmith„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊ§úÊüª„ÄÅÁõ£Ë¶ñ„ÄÅË©ï‰æ°„Åó„ÄÅÁ∂ôÁ∂öÁöÑ„Å´ÊúÄÈÅ©Âåñ„Åó„Å¶Ëá™‰ø°„ÇíÊåÅ„Å£„Å¶„Éá„Éó„É≠„Ç§„Åß„Åç„Åæ„Åô„ÄÇ\n\n3. **„Éá„Éó„É≠„Ç§**: LangGraph„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÁîüÁî£Ê∫ñÂÇô„ÅåÊï¥„Å£„ÅüAPI„ÇÑ„Ç¢„Ç∑„Çπ„Çø„É≥„Éà„Å´Â§âÊèõ„Åß„Åç„Åæ„Åô„ÄÇ\n\nLangChain„ÅØ„ÄÅ„ÉÅ„É£„ÉÉ„Éà„É¢„Éá„É´„ÇÑÂüã„ÇÅËæ

In [95]:
from langchain_community.retrievers import TavilySearchAPIRetriever

# langsmith„Åß„Éà„É¨„Éº„Çπ„Åó„ÇÑ„Åô„ÅÑ„Çà„ÅÜ„Å´config„Çí„Å§„Åë„Çã
langchain_document_retriever = retriever.with_config(
    {"run_name": "langchain_document_retriever"}
)

web_retriever = TavilySearchAPIRetriever(k=3).with_config(
    {"run_name": "web_retriever"}
)

In [101]:
from enum import Enum

class Route(str, Enum):
    langchain_document = "langchain_document"
    web = 'web'

class RouteOutput(BaseModel):
    route: Route


route_prompt = ChatPromptTemplate.from_template("""\
Ë≥™Âïè„Å´ÂõûÁ≠î„Åô„Çã„Åü„ÇÅ„Å´ÈÅ©Âàá„Å™Retriever„ÇíÈÅ∏Êäû„Åó„Å¶„Åè„Å†„Åï„ÅÑ„ÄÇ

Ë≥™Âïè: {question}
""")

route_chain = (
    route_prompt
    | model.with_structured_output(RouteOutput)
    | (lambda x: x.route)
    
)

In [102]:
class Route(Enum):
    langchain_document = "langchain_document"
    web = 'web'

In [107]:
isinstance(Test.web, str)

False

In [108]:
isinstance(Route.web, str)

True

In [109]:
def routed_retriever(inp: dict[str, Any]) -> list[Document]:
    question = inp["question"]
    route = inp["route"]

    if route == Route.langchain_document:
        return langchain_document_retriever.invoke(question)
    elif route == Route.web:
        return web_retriever.invoke(question)

    raise ValueError(f"Unknown retriever: {retriever}")


route_rag_chain = (
    {
        "question": RunnablePassthrough(),
        "route": route_chain,
    }
    | RunnablePassthrough.assign(context=routed_retriever)
    | prompt | model | StrOutputParser()
)

In [110]:
route_rag_chain.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

'LangChain„ÅØ„ÄÅÂ§ßË¶èÊ®°Ë®ÄË™û„É¢„Éá„É´ÔºàLLMÔºâ„ÇíÊ¥ªÁî®„Åó„Åü„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÈñãÁô∫„Åô„Çã„Åü„ÇÅ„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„Åß„Åô„ÄÇ„Åì„ÅÆ„Éï„É¨„Éº„É†„ÉØ„Éº„ÇØ„ÅØ„ÄÅLLM„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÅÆ„É©„Ç§„Éï„Çµ„Ç§„ÇØ„É´„ÅÆÂêÑÊÆµÈöé„ÇíÁ∞°Á¥†Âåñ„Åó„Åæ„Åô„ÄÇÂÖ∑‰ΩìÁöÑ„Å´„ÅØ„ÄÅ‰ª•‰∏ã„ÅÆ„Çà„ÅÜ„Å™Ê©üËÉΩ„Åå„ÅÇ„Çä„Åæ„Åô„ÄÇ\n\n1. **ÈñãÁô∫**: LangChain„ÅÆ„Ç™„Éº„Éó„É≥„ÇΩ„Éº„Çπ„Ç≥„É≥„Éù„Éº„Éç„É≥„Éà„ÇÑ„Çµ„Éº„Éâ„Éë„Éº„ÉÜ„Ç£„ÅÆÁµ±Âêà„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊßãÁØâ„Åß„Åç„Åæ„Åô„ÄÇ„Åæ„Åü„ÄÅLangGraph„ÇíÂà©Áî®„Åó„Å¶„ÄÅÁä∂ÊÖã„ÇíÊåÅ„Å§„Ç®„Éº„Ç∏„Çß„É≥„Éà„ÇíÊßãÁØâ„Åó„ÄÅ„Çπ„Éà„É™„Éº„Éü„É≥„Ç∞„ÇÑ‰∫∫Èñì„ÅÆ‰ªãÂÖ•„Çí„Çµ„Éù„Éº„Éà„Åó„Åæ„Åô„ÄÇ\n\n2. **ÁîüÁî£Âåñ**: LangSmith„Çí‰ΩøÁî®„Åó„Å¶„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÊ§úÊüª„ÄÅÁõ£Ë¶ñ„ÄÅË©ï‰æ°„Åó„ÄÅÁ∂ôÁ∂öÁöÑ„Å´ÊúÄÈÅ©Âåñ„Åó„Å¶Ëá™‰ø°„ÇíÊåÅ„Å£„Å¶„Éá„Éó„É≠„Ç§„Åß„Åç„Åæ„Åô„ÄÇ\n\n3. **„Éá„Éó„É≠„Ç§**: LangGraph„Ç¢„Éó„É™„Ç±„Éº„Ç∑„Éß„É≥„ÇíÁîüÁî£Ê∫ñÂÇô„ÅåÊï¥„Å£„ÅüAPI„ÇÑ„Ç¢„Ç∑„Çπ„Çø„É≥„Éà„Å´Â§âÊèõ„Åß„Åç„Åæ„Åô„ÄÇ\n\nLangChain„ÅØ„ÄÅLLM„ÇÑÈñ¢ÈÄ£ÊäÄË°ìÔºàÂüã„ÇÅËæº„Å

In [111]:
route_rag_chain.invoke("Êù±‰∫¨„ÅÆ‰ªäÊó•„ÅÆÂ§©Ê∞ó„ÅØÔºü")

'Êù±‰∫¨„ÅÆ‰ªäÊó•„ÅÆÂ§©Ê∞ó„ÅØ„ÄÅ10Êúà27Êó•ÔºàÊó•Ôºâ„ÅßÊúÄÈ´òÊ∞óÊ∏©24‚ÑÉ„ÄÅÊúÄ‰ΩéÊ∞óÊ∏©17‚ÑÉ„ÄÅÈôçÊ∞¥Á¢∫Áéá„ÅØ50%„Åß„Åô„ÄÇ'

## „Éè„Ç§„Éñ„É™„ÉÉ„ÉâÊ§úÁ¥¢

In [127]:
def reciprocal_rank_fusion(
    retriever_outputs: list[list[Document]],
    k: int = 60,
) -> list[str]:
    # ÂêÑ„Éâ„Ç≠„É•„É°„É≥„Éà„ÅÆ„Ç≥„É≥„ÉÜ„É≥„ÉÑ (ÊñáÂ≠óÂàó) „Å®„Åù„ÅÆ„Çπ„Ç≥„Ç¢„ÅÆÂØæÂøú„Çí‰øùÊåÅ„Åô„ÇãËæûÊõ∏„ÇíÊ∫ñÂÇô
    content_score_mapping = {}

    # Ê§úÁ¥¢„ÇØ„Ç®„É™„Åî„Å®„Å´„É´„Éº„Éó
    for docs in retriever_outputs:
        # Ê§úÁ¥¢ÁµêÊûú„ÅÆ„Éâ„Ç≠„É•„É°„É≥„Éà„Åî„Å®„Å´„É´„Éº„Éó
        for rank, doc in enumerate(docs):
            content = doc.page_content

            # Âàù„ÇÅ„Å¶ÁôªÂ†¥„Åó„Åü„Ç≥„É≥„ÉÜ„É≥„ÉÑ„ÅÆÂ†¥Âêà„ÅØ„Çπ„Ç≥„Ç¢„Çí0„ÅßÂàùÊúüÂåñ
            if content not in content_score_mapping:
                content_score_mapping[content] = 0

            # (1 / (È†Ü‰Ωç + k)) „ÅÆ„Çπ„Ç≥„Ç¢„ÇíÂä†ÁÆó
            content_score_mapping[content] += 1 / (rank + k)

    # „Çπ„Ç≥„Ç¢„ÅÆÂ§ß„Åç„ÅÑÈ†Ü„Å´„ÇΩ„Éº„Éà
    ranked = sorted(content_score_mapping.items(), key=lambda x: x[1], reverse=True)  # noqa
    return [content for content, _ in ranked]

In [126]:
from langchain_community.retrievers import BM25Retriever

chroma_retriever = retriever.with_config(
    {"run_name": "chroma_retriever"}
)

bm25_retriever = BM25Retriever.from_documents(documents).with_config(
    {"run_name": "bm25_retriever"}
)

In [132]:
from langchain_core.runnables import RunnableParallel

hybrid_retriever = (
    RunnableParallel({
        "chroma_documents": chroma_retriever,
        "bm25_documents": bm25_retriever,
    })
    # | (lambda x: [x["chroma_documents"], x["bm25_documents"]])
    # | reciprocal_rank_fusion
)

In [130]:
hybrid_rag_chain = (
    {
        "question": RunnablePassthrough(),
        "context": hybrid_retriever,
    }
    | prompt | model | StrOutputParser()
)

hybrid_rag_chain.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

AttributeError: 'dict' object has no attribute 'invoke'

In [133]:
hybrid_retriever.invoke("LangChain„ÅÆÊ¶ÇË¶Å„ÇíÊïô„Åà„Å¶")

{'chroma_documents': [Document(metadata={'file_name': 'introduction.mdx', 'file_path': 'docs/docs/introduction.mdx', 'file_type': '.mdx', 'source': 'docs/docs/introduction.mdx'}, page_content='---\nsidebar_position: 0\nsidebar_class_name: hidden\n---\n\n# Introduction\n\n**LangChain** is a framework for developing applications powered by large language models (LLMs).\n\nLangChain simplifies every stage of the LLM application lifecycle:\n- **Development**: Build your applications using LangChain\'s open-source [components](/docs/concepts) and [third-party integrations](/docs/integrations/providers/).\nUse [LangGraph](/docs/concepts/architecture/#langgraph) to build stateful agents with first-class streaming and human-in-the-loop support.\n- **Productionization**: Use [LangSmith](https://docs.smith.langchain.com/) to inspect, monitor and evaluate your applications, so that you can continuously optimize and deploy with confidence.\n- **Deployment**: Turn your LangGraph applications into p