In [1]:
import getpass
import os
from langchain.chat_models import init_chat_model
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [2]:
os.environ['LANGSMITH_TRACING'] = 'true'
os.environ['LANGSMITH_API_KEY'] = getpass.getpass()

#### Components

##### Chat model : OpenAI

In [3]:
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI:")

In [4]:
llm= init_chat_model("gpt-4o-mini",model_provider="openai")

##### Embedding model : OpenAI

In [5]:
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI:")

In [6]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

##### Vector Store : Chroma

In [7]:
vector_store = Chroma(collection_name="example_collection",
                      embedding_function=embeddings,
                      persist_directory="./chroma_langchain_db")

##### Simple indexing pipeline and RAG chain 

In [8]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START,StateGraph
from typing_extensions import List,TypedDict

USER_AGENT environment variable not set, consider setting it to identify your requests.


##### Load and chunk the contents of the blog

* Document loader (Object) - to load the documents and return list of Document objects

* WebBaseLoader uses urllib to load HTML from web URLs and BeautifulSoup parses it to text 
* Customized HTML -> text parsing using bs_kwargs

* bs4 strainer only keeps post title, headers, and content from the full HTML.

In [9]:
bs4_strainer = bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))

In [10]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only":bs4_strainer},
        )

docs = loader.load()

In [11]:
assert len(docs) == 1
print(f"Total_characters: {len(docs[0].page_content)}")

Total_characters: 43130


##### Splitting Documents

* Above Loaded doc contains around 43k characters which is too long to fit into the context window of any model (Token constraints)
* Even for feasible models, large piece of text makes the model difficult to search and find matches
* To ease the retrieval process, we split the documents into chunks
* Recursive text splitter splits the doc into chunks using common separators like new lines until each chunk is the appropriate size ( Recommended text splitter for generic text use cases)

* <b> chunk_size = 1000:

            each chunk will have 1000 characters
* <b> chunk_overlap = 200:

            200 characters from the end of the previous chunk will be included in the next chunk to ensure context continuity

In [12]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap = 200,add_start_index=True)
all_splits = text_splitter.split_documents(docs)
print(f"Split the blog post into {len(all_splits)} chunks")

Split the blog post into 66 chunks


In [13]:
all_splits[:5]

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 8}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from

In [14]:
for idx,chunk in enumerate(all_splits):
    print(idx,len(chunk.page_content))

0 969
1 609
2 606
3 644
4 971
5 506
6 902
7 706
8 164
9 960
10 412
11 903
12 834
13 545
14 969
15 986
16 459
17 542
18 760
19 772
20 818
21 469
22 655
23 820
24 476
25 388
26 855
27 805
28 639
29 456
30 610
31 616
32 679
33 726
34 971
35 195
36 997
37 828
38 624
39 955
40 541
41 961
42 704
43 556
44 958
45 666
46 664
47 983
48 127
49 936
50 999
51 310
52 18
53 48
54 991
55 996
56 476
57 702
58 989
59 378
60 132
61 808
62 568
63 976
64 956
65 940


##### Index Chunks

* Vector store is a defined vector db to store the embeddings of the chunks - Here ChromaDB is used
* vectors helps retrieve relevant chunks based on the similarity search with the user query / prompt

In [16]:
document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids[:3])

APIConnectionError: Connection error.

#####  Define prompt for question-answering

* This pulls a pre-built prompt template from langchain hub
* "rlm/rag-prompt" - prompt specifically designed for retrieval augmented question answering
* It sets up how the question + context will be sent to language model

* Prompt Template will be something like this:
* You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
* Question: (question goes here) 
* Context: (context goes here) 
* Answer:

In [51]:
prompt = hub.pull("rlm/rag-prompt")



##### Define state for application

* The state of the application controls what data is
    - input to the application,
    - transferred between steps, and
    - output by the application. It is typically a TypedDict


* This acts as a container to store question, context and answer

In [52]:
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

##### Define application steps

<b> STEP 1: Retrieval

* Takes in user's question
* Uses similarity search to find the most relevant chunk
* retrieves top_k relevant chunk based on the similarity value (k=4 by default)
* Return them as context

In [None]:
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

<b> STEP 2: Generation

* Joins all the retrieved chunks into a single text
* passes the question + context to the prompt
* Sends it to LLM
* Returns answer

In [54]:
def generate(state:State):
    docs_content = "\n\n".join([doc.page_content for doc in state["context"]])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer":response.content} 

##### Compile application and test

In [55]:
graph_builder = StateGraph(State).add_sequence([retrieve,generate])
graph_builder.add_edge(START,"retrieve")
graph = graph_builder.compile()

* StateGraph(State): Creates LangGraph based on State definition
* .add_sequence([retrieve, generate]): Order of execution
* add_edge(START, "retrieve"): Tells the graph to start from retrieval first
* .compile(): Finalizes the graph into an executable app

In [56]:
response = graph.invoke({"question":"What is Task Decomposition?"})
print(response["answer"])

Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Unauthorized"}\n')trace=3bedfd3d-0371-4b57-9a5e-3aa91de0e965,id=3bedfd3d-0371-4b57-9a5e-3aa91de0e965; trace=3bedfd3d-0371-4b57-9a5e-3aa91de0e965,id=0367fc63-f5b3-418c-9fec-4df8af3dcc04; trace=3bedfd3d-0371-4b57-9a5e-3aa91de0e965,id=759d9e4d-d868-4b39-8638-9f05602adf77; trace=3bedfd3d-0371-4b57-9a5e-3aa91de0e965,id=7836823e-8574-44af-b1f3-37228c1af152; trace=3bedfd3d-0371-4b57-9a5e-3aa91de0e965,id=13f23fde-6c53-4331-9cb1-7fad33a6c9c0
Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Unauthorized"}\n')trace=3bedfd3d-0371-4b57-9a5e-3aa91de

APIConnectionError: Connection error.

##### To visualize the control flow of the application

In [18]:
from IPython.display import Image, display

display(Image(graph.get_graph().draw_mermaid_png()))

NameError: name 'graph' is not defined

#### Query Analysis

* This employs models to transform the raw user query into structured / more optimized format

In [20]:
total_documents =len(all_splits)
third = total_documents // 3
third

22

In [22]:
all_splits[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 8}

In [23]:
for idx, doc in enumerate(all_splits):
    if idx<third:
        doc.metadata["section"] = 'beginning'
    elif idx<third*2:
        doc.metadata["section"] = 'middle'
    else:
        doc.metadata["section"] = 'end'

In [25]:
all_splits[22].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 15458,
 'section': 'middle'}

##### Store using InMemoryVectorStore

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
vector_store = InMemoryVectorStore(embeddings)
document_ids = vector_store.add_documents(documents=all_splits)

##### Define schema for search query

In [29]:
from typing import Literal
from typing_extensions import Annotated

* This defines a schema for what the LLM should extract from the user’s question:

* query: what to search for.

* section: which part of the document is relevant (beginning, middle, end).

Based on the user question, LLM tries to find the section of the relevant chunk and retrieves based on the similarity , instead of just checking all the chunks 

Pre-filter chunks using metadata 

In [None]:
class Search(TypedDict):
    query : Annotated[str,...,"Search query to run."]
    section: Annotated[
        Literal["beginning","middle","end"],
        ...,"Section to query",
    ]


class State(TypedDict):
    question: str
    query: Search
    context: List[Document]
    answer: str

In [None]:
def analyze_query(state : State):
    structured_llm = llm.with_structured_output(Search)
    query = structured_llm.invoke(state["question"])
    return {"query":query}

def retrieve(state: State):
    query = state["query"]
    retrieved_docs = vector_store.similarity_search(query["query"],filter=lambda doc: doc.metadata.get("section") == query["section"],)
    
    return {"context": retrieved_docs}

def generate(state:State):
    docs_content = "\n\n".join([doc.page_content for doc in state["context"]])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer":response.content}

graph_builder = StateGraph(State).add_sequence([analyze_query,retrieve, generate])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

In [None]:
display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
for step in graph.stream(
    {"question": "What is Task Decomposition?"}, stream_mode="updates"
):
    print(f"{step}\n\n----------------\n")

In [None]:
for message, metadata in graph.stream(
    {"question": "What is Task Decomposition?"}, stream_mode="messages"
):
    print(message.content, end="|")