Langchain Reference: https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_crag/#llms


In [1]:
from langchain.messages import AnyMessage, SystemMessage
from langchain.messages import ToolMessage
from langchain_core.prompts import ChatPromptTemplate
from langgraph.graph import StateGraph, START, END
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from typing import Literal
from pydantic import BaseModel, Field
from typing_extensions import TypedDict, Annotated
import operator
from langchain_community.vectorstores import FAISS

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
vector_store_path = "vectorstores/faiss_store"
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

db = FAISS.load_local(
    vector_store_path,
    embeddings,
    allow_dangerous_deserialization=True
)
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 2}      
)

In [4]:
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
question = "agent memory"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt})) 

binary_score='yes'


In [5]:
### Generate
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = ChatPromptTemplate.from_template(
    """
You are a smart and helpful assistant designed to answer user questions.

Instructions:
1. If the user asks a general or social question (e.g., greetings, current date/time, small talk, simple facts), respond naturally using your own knowledge.
2. If the question is factual or complex (e.g., about specific topics, facts, data, analysis), use ONLY the information from the provided context.
3. DO NOT use your own knowledge to answer detailed or technical questions unless they are general facts.
4. If the answer to a complex or factual question is NOT in the context, say:
   "I do not know based on the provided context."
5. Be concise and factual. Avoid overexplaining unless it's directly supported by the context.

Context:
{context}

Question:
{question}

Answer:
"""
)

# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

In a LLM-powered autonomous agent system, memory is categorized into two types:

1. **Short-term memory**: This involves in-context learning, where the model utilizes its immediate context to learn and adapt.

2. **Long-term memory**: This allows the agent to retain and recall information over extended periods, often by using an external vector store for fast retrieval.


Question rewriter

In [6]:
### Question Re-writer

# LLM
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n 
     for web search. Look at the input and try to reason about the underlying semantic intent / meaning."""
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()
question_rewriter.invoke({"question": question})

'What is agent memory and how does it function in artificial intelligence?'

Web search tool

In [7]:
from langchain_community.tools import DuckDuckGoSearchRun

web_search_tool = DuckDuckGoSearchRun()

web_search_tool.invoke("Obama's first name?")

'2 days ago - Barack Hussein Obama II (born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African American president. Obama previously served as a U.S. senator representing Illinois from ... 3 weeks ago - Barack Hussein Obama II (/bəˈrɑːk huːˈseɪn oʊˈbɑːmə/ ; born August 4, 1961) is an American politician and attorney. He was the 44th president of the United States from 2009 to 2017. He was the first African-American president in U.S. history. A member of the Democratic Party, he ... November 25, 2025 - Obama is a surname. It most commonly refers to Barack Obama (born 1961), the 44th president of the United States. Obama is a common Fang masculine name in western Central Africa. 2 weeks ago - Barack Hussein Obama Sr. (/ˈbærək huːˈseɪn oʊˈbɑːmə/; born Baraka Obama, 18 June 1934 – 24 November 1982) was a Kenyan senior governmental economist and the father of Barack Obam

Define state


In [8]:
from typing import List

from typing_extensions import TypedDict


class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    question: str
    generation: str
    web_search: str
    documents: List[str]

Retrieve Node

In [9]:
def retrieve(state):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents}

Answer node

In [10]:
def generate(state):
    """
    Generate answer based on whether context documents are available.

    Args:
        state (dict): The current graph state.

    Returns:
        dict: Updated state with 'generation' key containing the LLM response.
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state.get("documents", None)

    if documents:
        # Use RAG generation with retrieved documents
        generation = rag_chain.invoke({"context": documents, "question": question})
    else:
        # Fallback: General generation without retrieval context
        general_prompt = ChatPromptTemplate.from_template(
            """
You are a helpful and concise assistant.

Answer the following question using general knowledge. Be accurate, polite, and to the point.

Question:
{question}

Answer:
"""
        )
        general_chain = general_prompt | llm | StrOutputParser()
        generation = general_chain.invoke({"question": question})

    return {
        "documents": documents,
        "question": question,
        "generation": generation
    }


Web search node

In [11]:
from langchain_core.documents import Document
def web_search(state):
    """
    Web search based on the re-phrased question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with appended web results
    """

    print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    raw_text = web_search_tool.invoke({"query": question})
    doc = Document(
    page_content=raw_text,
    metadata={"source": "duckduckgo", "type": "web"}
)
    documents.append(doc)

    return {"documents": documents, "question": question}

Define grader node

In [12]:
def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "web_search": web_search}


Define query transforming node

In [13]:

def transform_query(state):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---TRANSFORM QUERY---")
    question = state["question"]
    documents = state["documents"]

    # Re-write question
    better_question = question_rewriter.invoke({"question": question})
    return {"documents": documents, "question": better_question}

Logic decide retrieve or not

In [14]:
def check_retrieve(state):
    """
    Classify whether the question requires retrieval.
    Returns:
        state with key: "retrieve" = "retrieve" | "no retrieve"
    """
    question = state["question"]

    prompt = ChatPromptTemplate.from_template(
    """
You are a retrieval classifier.

Task:
Decide whether the following question requires retrieving information from external documents (e.g., a vector database or knowledge base).

Guidelines:
- Respond with exactly one word: "retrieve" or "no retrieve"
- Use "retrieve" if the question asks for:
  - Specific facts, data, or documents
  - Names, statistics, figures, or dates that are not general/common knowledge
  - Up-to-date or domain-specific knowledge (e.g., medical, legal, technical)
- Use "no retrieve" if the question:
  - Can be answered with general reasoning or common knowledge
  - Involves small talk, greetings, or simple questions like the current date, weather, or definitions

Only output one of the following:
- retrieve
- no retrieve

Question:
{question}

Answer:
"""
)


    chain = prompt | llm | StrOutputParser()

    decision = chain.invoke({"question": question})

    return {
        "retrieve": decision
    }


In [15]:
def decide_to_retrieve(state):
    """
    Decide whether to retrieve based on human question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---DECIDE TO RETRIEVE---")
    decide = state["retrieve"]

    if decide == "retrieve":
        print("Decision: NEED RETRIEVAL")
        return "retrieve"
    else:
        print("Decision: NO RETRIEVAL NEEDED")
        return "generate"

Logic to end node

In [16]:
def decide_to_generate(state):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    state["question"]
    web_search = state["web_search"]
    state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print(
            "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
        )
        return "transform_query"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

Graph building

In [17]:
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("check_retrieve", check_retrieve)  # check retrieve
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generate
workflow.add_node("transform_query", transform_query)  # transform_query
workflow.add_node("web_search_node", web_search)  # web search

# Build graph
workflow.add_edge(START, "check_retrieve")
# workflow.add_edge(START, "retrieve")
workflow.add_conditional_edges(
    "check_retrieve",
    decide_to_retrieve,
    {
        "retrieve": "retrieve",
        "generate": "generate",
    }
)
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    }
)
workflow.add_edge("transform_query", "web_search_node")
workflow.add_edge("web_search_node", "generate")
workflow.add_edge("generate", END)

# Compile
app = workflow.compile()

In [18]:
from pprint import pprint

# Run
inputs = {"question": "types of basic prompt engineering techniques"}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint("\n---\n")

# Final generation
pprint(value["generation"])

---DECIDE TO RETRIEVE---
Decision: NEED RETRIEVAL
"Node 'check_retrieve':"
'\n---\n'
---RETRIEVE---
"Node 'retrieve':"
'\n---\n'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
"Node 'grade_documents':"
'\n---\n'
---GENERATE---
"Node 'generate':"
'\n---\n'
('The basic prompt engineering techniques mentioned in the context are:\n'
 '\n'
 '1. **Zero-Shot Learning**: This involves feeding the task text to the model '
 'and asking for results without any prior examples.\n'
 '\n'
 '2. **Few-Shot Learning**: This presents a set of high-quality '
 'demonstrations, each consisting of both input and desired output, to help '
 'the model understand human intention and criteria for the task.')
