# [Adaptive RAG](https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_adaptive_rag/#create-index)

In [44]:
!pip install -qU langchain langchain-core langchain-community langchain-experimental langchain-groq langgraph langsmith langchain-pinecone langchain-huggingface sentence_transformers beautifulsoup4 langchain-text-splitters

# Set up environments.

In [45]:
import os
from google.colab import userdata
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')
os.environ['HUGGINGFACEHUB_API_TOKEN'] = userdata.get('HUGGINGFACEHUB_API_TOKEN')
os.environ['LANGSMITH_API_KEY'] = userdata.get('LANGSMITH_API_KEY')
os.environ['PINECONE_API_KEY'] = userdata.get('PINECONE_API_KEY')

In [46]:
os.environ['environment'] = "aws"

# Set up Langsmith

In [47]:
os.environ['LANGCHAIN_API_KEY'] = userdata.get('LANGSMITH_API_KEY')
os.environ['LANGSMITH_TRACING'] = "true"
os.environ["LANGCHAIN_PROJECT"] = "MultiAgentChatbot"

# Load Content from URL
[WebBaseLoader](https://python.langchain.com/v0.2/docs/integrations/document_loaders/web_base/)

In [48]:
from langchain_community.document_loaders import WebBaseLoader

In [49]:
# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

Using tiktoken encoder



```
# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)
```

one differnt approach to consider

In [50]:
loader = WebBaseLoader(urls)

In [51]:
docs = loader.load()

In [52]:
docs[0]

Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}, page_content='\n\n\n\n\n\nLLM Powered Autonomous Agents | Lil\'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLil\'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPosts\n\n\n\n\nArchive\n\n\n\n\nSearch\n\n\n\n\nTags\n\n\n\n\nFAQ\n\n\n\n\nemojisearch.app\n\n\n\n\n\n\n\n\n\n      LLM Powered Autono

# Chunk Document
[RecursiveCharacterTextSplitter](https://python.langchain.com/v0.2/docs/how_to/recursive_text_splitter/)

In [53]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [54]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=3000,
    chunk_overlap=500,
    length_function=len,
    is_separator_regex=False
)

In [55]:
docs_split = text_splitter.split_documents(docs)

In [56]:
len(docs_split)

55

In [57]:
docs_split[0]

Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}, page_content="LLM Powered Autonomous Agents | Lil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPosts\n\n\n\n\nArchive\n\n\n\n\nSearch\n\n\n\n\nTags\n\n\n\n\nFAQ\n\n\n\n\nemojisearch.app\n\n\n\n\n\n\n\n\n\n      LLM Powered Autonomous Agents\n 

# Embeddings

[HuggingFaceEmbeddings](https://python.langchain.com/v0.2/docs/integrations/text_embedding/huggingfacehub/)

In [58]:
from langchain.embeddings import HuggingFaceEmbeddings

In [59]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [60]:

embedding_size = embeddings.embed_query("Hi")
print(len(embedding_size))

384


# Store Data in Database.

In [68]:
import time
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore

In [64]:
pc = Pinecone()

index_name = "langgraphrag"

In [65]:
existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [70]:
vector_store_index = PineconeVectorStore(index=index, embedding=embeddings)

In [71]:
vector_store_index.add_documents(documents=docs_split)

['d120adcd-a531-49ac-b462-fa275991b8fc',
 '7a46a8c5-ba9a-4605-838e-165f65a49132',
 '4e88ca6b-0fef-4b02-b6c8-23ca861e61e9',
 '13c7e842-ed68-40fe-8c73-c987da16900a',
 '91bc0ba1-1290-47a0-8426-920c075b8c35',
 '53902747-3b57-4dbe-9329-9dc36f8e8c63',
 'b5e14f26-dd53-4e9d-9e5b-9e781c00d018',
 '7de6afe2-3ec6-4a0a-a11a-6acadb5f2f48',
 'f81b6754-a678-44bb-bcd1-5eeaba43cc6c',
 '50f3a60c-0b97-4430-b831-7a45f9bdaebf',
 '9587fa09-cc38-4938-9769-32d0080e09cf',
 '84ff27d9-a777-4645-ac62-82a00aa4bb06',
 'b841de01-c08e-4e0b-adfd-3dbbd8ee0ca2',
 '757072db-9212-4da7-aded-19e401eb91bd',
 'f5115f0e-b301-4cb6-bee6-75c2468a6b3a',
 '8ccea493-152e-4dab-ad3f-aa812fa3a788',
 'cc813ec1-6c5b-46d5-9273-6076740020a9',
 '17446991-3d25-4d09-8fec-9c0488b2766d',
 'd7de27a5-24e3-4112-8daf-4255d625a460',
 '08ed1463-b8dd-4846-8f9f-da69f3cade92',
 '37583f92-3078-4ae3-a2d5-e966d67184df',
 'f4437c99-429a-4ecb-b598-0b26ccc57ab8',
 '05428aa2-0414-4203-960c-a3a74db1b7cf',
 'd64f4cc2-fd80-4853-8304-811a4dce6eeb',
 '682bfea8-445f-

# Retriever

In [72]:
retriever = vector_store_index.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 1, "score_threshold": 0.5},
)

In [73]:
retriever.invoke("Key component of Agents")

[Document(id='d120adcd-a531-49ac-b462-fa275991b8fc', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}, page_content="LLM Powered Autonomous Agents | Lil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPosts\n\n\n\n\nArchive\n\n\n\n\nSearch\n\n\n\n\nTags\n\n\n\n\nFAQ\n\n\n\n\nemojisearch.app\n\n\n\n\n\n\n

In [74]:
retriever.invoke("What is Agents?")

[Document(id='d120adcd-a531-49ac-b462-fa275991b8fc', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}, page_content="LLM Powered Autonomous Agents | Lil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPosts\n\n\n\n\nArchive\n\n\n\n\nSearch\n\n\n\n\nTags\n\n\n\n\nFAQ\n\n\n\n\nemojisearch.app\n\n\n\n\n\n\n

# Tools

In [78]:
!pip install -qU langchain-community wikipedia

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone


In [79]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [80]:
wiki_api_wrapper = WikipediaAPIWrapper(
    top_k_results = 1,
    doc_content_chars_max=100
)

wiki_tool = WikipediaQueryRun(api_wrapper = wiki_api_wrapper)

# Build Langgraph agents

In [75]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

In [76]:
from typing import Annotated, Literal, TypedDict
from langchain_core.tools import tool
from langgraph.graph import END, START, StateGraph, MessagesState
from langgraph.prebuilt import ToolNode, tools_condition

## Router Model

[Langchain Router](https://python.langchain.com/v0.1/docs/use_cases/query_analysis/techniques/routing/)

In [77]:
class RouteQuery(BaseModel):
    """Route a user query to the most relevant data source."""

    datasource: Literal["vectorstore", "wiki_search"] = Field(
        ...,
        description="Given the user query choose to route to wikipedia or vectorstore.",
    )


### Routing with function calling models

In [81]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model = 'mixtral-8x7b-32768',
    temperature=0.0
)

In [82]:
structured_llm = llm.with_structured_output(RouteQuery)

In [83]:
system = """You are an expert at routing a user question to a vectorstore or wikipedia.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. Otherwise, use wiki-search."""


router_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

router = router_prompt | structured_llm

In [84]:
router.invoke({"question": "who is Sam Altman?"})

RouteQuery(datasource='wiki_search')

In [85]:
router.invoke({"question": "What is a agents in LLM?"})

RouteQuery(datasource='vectorstore')

# Connecting all the Agents with Langgraph

[Capture the flow in as graph](https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_adaptive_rag/#construct-the-graph)

In [88]:
from typing import List

from typing_extensions import TypedDict

In [89]:
class GraphState(TypedDict):
  """
  Represents the state of our graph.

  Attributes:
      question: question
      generation: LLM generation
      documents: list of documents
  """

  question: str
  generation: str
  documents: List[str]

In [90]:
from langchain.schema import Document


def retrieve(state):
  """
  Retrieve documents

  Args:
      state (dict): The current graph state

  Returns:
      state (dict): New key added to state, documents, that contains retrieved documents
  """
  print("---RETRIEVE---")
  question = state["question"]

  # Retrieval
  documents = retriever.invoke(question)
  return {"documents": documents, "question": question}

In [91]:
def web_search(state):
  """
  Web search based on the re-phrased question.

  Args:
      state (dict): The current graph state

  Returns:
      state (dict): Updates documents key with appended web results
  """

  print("---WEB SEARCH---")
  question = state["question"]

  # Web search
  docs = wiki_tool.invoke({"query": question})
  web_results = "\n".join([d["content"] for d in docs])
  web_results = Document(page_content=web_results)

  return {"documents": web_results, "question": question}

In [92]:
def route_question(state):
  """
  Route question to web search or RAG.

  Args:
      state (dict): The current graph state

  Returns:
      str: Next node to call
  """

  print("---ROUTE QUESTION---")
  question = state["question"]
  source = router.invoke({"question": question})

  if source.datasource == "web_search":
      print("---ROUTE QUESTION TO WEB SEARCH---")
      return "web_search"

  elif source.datasource == "vectorstore":
      print("---ROUTE QUESTION TO RAG---")
      return "vectorstore"