In [1]:
import getpass
import os
from langchain.chat_models import init_chat_model
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
import warnings
warnings.filterwarnings("ignore")
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from typing import List, TypedDict, Literal
from langchain_core.documents import Document
from langchain_core.vectorstores import InMemoryVectorStore
from pydantic import BaseModel, Field
from langchain import hub
from PIL import Image 
from langchain_core.messages import HumanMessage, SystemMessage

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
os.environ['LANGSMITH_TRACING'] = 'true'
os.environ['LANGSMITH_API_KEY'] = getpass.getpass()

#### Components

##### Chat model : NVIDIA

In [3]:
if not os.environ.get("NVIDIA_API_KEY"):
    os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter API key for NVIDIA:")

In [4]:
llm = init_chat_model("meta/llama3-70b-instruct", model_provider="nvidia")

##### Embedding model : NVIDIA

In [5]:
if not os.environ.get("NVIDIA_API_KEY"):
    os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter API key for NVIDIA: ")

In [6]:
embeddings = NVIDIAEmbeddings(model="NV-Embed-QA")

##### Vector Store : InMemory

In [7]:
vector_store = InMemoryVectorStore(embeddings)

##### RAG chain pipeline with query analysis using langGraph

In [9]:

# Load the blog post using WebBaseLoader
bs4_strainer = bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()
assert len(docs) == 1
print(f"Total_characters: {len(docs[0].page_content)}")

# Split the blog post into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
print(f"Split the blog post into {len(all_splits)} chunks")

# Add metadata to each chunk
total_documents = len(all_splits)
third = total_documents // 3

for idx, doc in enumerate(all_splits):
    if idx < third:
        doc.metadata["section"] = "beginning"
    elif idx < third * 2:
        doc.metadata["section"] = "middle"
    else:
        doc.metadata["section"] = "end"

# Add the chunks to the vector store
document_ids = vector_store.add_documents(all_splits)
print("Total embeddings added to vector store:", len(document_ids))

# Define search schema
class Search(BaseModel):
    query: str = Field(..., description="Search query to run.")
    section: Literal["beginning", "middle", "end"] = Field(
        ..., description="Section to query."
    )  # Field(...) says this is a required field # Literal is a type alias that represents a fixed set of values


# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

# Define state
class State(TypedDict):
    question: str
    query: Search
    context: List[Document]
    answer: str

# Define analyze_query node
def analyze_query(state: State):

   # Custom prompt for structured query extraction
    system_msg = (
        "You are a helpful assistant that extracts structured data from questions. "
        "Given a user's question, extract the search query and whether the answer should be "
        "from the beginning, middle, or end of a document. "
        "Respond strictly in this JSON format:\n"
        '{ "query": "<search terms>", "section": "beginning|middle|end" }'
        "Always say 'thanks for asking!' at the end of the answer."
    )

    messages = [
        SystemMessage(content=system_msg),
        HumanMessage(content=state["question"]),
    ]

    # Now bind the LLM with only the schema (not a prompt)
    structured_llm = llm.with_structured_output(schema = Search)
    query = structured_llm.invoke(messages)
    return {"query": query}


# Define retrieve node
def retrieve(state: State):
    query = state["query"]
    retrieved_docs = vector_store.similarity_search(
        query.query,
        filter=lambda doc: doc.metadata.get("section") == query.section,
    )
    return {"context": retrieved_docs}


# Define generate node
def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Build LangGraph
graph_builder = StateGraph(State).add_sequence([analyze_query, retrieve, generate])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

# Run the graph 
response = graph.invoke({"question": "What is Task Decomposition?"})
print("Response:",response["answer"])


# Graph stream in steps
for step in graph.stream(
    {"question": "What is Task Decomposition?"},
    stream_mode="updates",
):
    print(f"{step}\n\n----------------\n")

Total_characters: 43130
Split the blog post into 66 chunks
Total embeddings added to vector store: 66
Response: Task decomposition is the process of breaking down a complicated task into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks, making it easier to plan and execute. This can be done through various methods, including simple prompting, task-specific instructions, or human inputs.
{'analyze_query': {'query': Search(query='Task Decomposition', section='beginning')}}

----------------

{'retrieve': {'context': [Document(id='c1ddf568-cb0d-439f-b1c7-661e47ec72eb', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 1585, 'section': 'beginning'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022