In [136]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

In [137]:
# chat model
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain.chat_models import init_chat_model

llm = init_chat_model("gpt-4o-mini", model_provider="openai")

In [138]:
# Embedding mode
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [139]:
# Faiss vector store
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

embedding_dim = len(embeddings.embed_query("hello world"))
index = faiss.IndexFlatL2(embedding_dim)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [140]:
# indexing
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

assert len(docs) == 1
print(f"Total characters: {len(docs[0].page_content)}")

Total characters: 43047


In [141]:
# splitting document
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 63 sub-documents.


In [142]:
# storing document
import uuid

# Add a unique ID to each document's metadata
for doc in all_splits:
    doc.metadata["id"] = str(uuid.uuid4())

document_ids = vector_store.add_documents(documents=all_splits)

print(f"First 3 document IDs in vector store: {document_ids[:3]}")

First 3 document IDs in vector store: ['84a479b5-5a57-434e-b386-1e3ec9a69648', '65e282eb-3c46-41d9-a787-95cd90b10cbc', '2befd0c1-8025-497c-a7ed-61938faab880']


In [143]:
from langchain import hub

# N.B. for non-US LangSmith endpoints, you may need to specify
# api_url="https://api.smith.langchain.com" in hub.pull.
prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "(context goes here)", "question": "(question goes here)"}
).to_messages()

assert len(example_messages) == 1
print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: (question goes here) 
Context: (context goes here) 
Answer:




In [144]:
from typing_extensions import List, TypedDict

# Define a new structure for our retrieved context
class RetrievedDoc(TypedDict):
    id: str
    text: str
    score: float

# Update the State to use our new structure
class State(TypedDict):
    question: str
    context: List[RetrievedDoc]
    answer: str

In [145]:
def retrieve(state: State):
    """
    Retrieves documents using a two-step MMR approach to include scores.
    """
    # Define MMR parameters from your original retriever
    k = 5
    fetch_k = 10
    lambda_mult = 0.6

    # 1. First, get a larger pool of documents with their similarity scores
    # This is the candidate set for MMR.
    candidate_docs_with_scores = vector_store.similarity_search_with_score(
        state["question"],
        k=fetch_k
    )

    # Create a dictionary to map document content/ID to its score for easy lookup
    score_map = {doc.metadata["id"]: score for doc, score in candidate_docs_with_scores}

    # 2. Now, run a separate MMR search to get the final, diverse set of documents
    # This call does NOT return scores, but it returns the correct documents.
    mmr_selected_docs = vector_store.max_marginal_relevance_search(
        state["question"],
        k=k,
        fetch_k=fetch_k,
        lambda_mult=lambda_mult
    )

    # 3. Combine the results to format the final context
    formatted_context = []
    for doc in mmr_selected_docs:
        # Look up the score from the map we created in step 1
        score = score_map.get(doc.metadata["id"], "N/A")
        formatted_context.append({
            "id": doc.metadata.get("id", "N/A"),
            "text": doc.page_content,
            "score": score
        })

    return {"context": formatted_context}


def generate(state: State):
    """
    Generates an answer using the formatted context from the retrieve step.
    """
    # Adapt to the new context structure (a list of dictionaries)
    docs_content = "\n\n".join(doc["text"] for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [146]:
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [147]:
result = graph.invoke({"question": "What is Task Decomposition?"})

print("--- Retrieved Context ---")
for doc in result['context']:
    print(f"  ID: {doc['id']}")
    print(f"  Score: {doc['score']:.4f}")
    print(f"  Text: {doc['text'][:200]}...\n")
print("-------------------------\n")
print(f"Final Answer: {result['answer']}")

--- Retrieved Context ---
  ID: ee7adb46-d9ae-49a7-a103-be8adce23d4b
  Score: 1.0511
  Text: Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outl...

  ID: b05603dd-4815-4a19-a01c-4d27fc4d1285
  Score: 1.3123
  Text: The AI assistant can parse user input to several tasks: [{"task": task, "id", task_id, "dep": dependency_task_ids, "args": {"text": text, "image": URL, "audio": URL, "video": URL}}]. The "dep" field d...

  ID: 752fefab-98d1-4801-8051-e43301876bfc
  Score: 1.0795
  Text: Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a s...

  ID: c6476961-21c6-4f97-a84e-65ef18afcb79
  Score: 1.5196
  Text: "content": "Please now remember the steps:\n\nThink step by step and reason yourself to the

In [148]:
for step in graph.stream(
    {"question": "What is Task Decomposition?"}, stream_mode="updates"
):
    print(f"{step}\n\n----------------\n")

{'retrieve': {'context': [{'id': 'ee7adb46-d9ae-49a7-a103-be8adce23d4b', 'text': 'Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in