In [None]:
! pip install -U langchain_community tiktoken langchainhub pymilvus langchain langgraph tavily-python sentence-transformers langchain-milvus langchain-huggingface

In [27]:
import os
os.environ['http_proxy'] = "http://127.0.0.1:8123" 
os.environ['https_proxy'] = "http://df-127.0.0.1:8123" 

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

In [19]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [7]:
from langchain.globals import set_verbose, set_debug

set_debug(True)
set_verbose(True)

### LLM

local_llm = 'llama3'



In [None]:
!ollama pull llama3

In [24]:
doc_splits

[Document(metadata={'source': '/root/users/jusjus/Self/2406.07550v1.pdf', 'page': 0}, page_content='An Image is Worth 32 Tokens\nfor Reconstruction and Generation\nQihang Yu1*, Mark Weber1,2*, Xueqing Deng1, Xiaohui Shen1, Daniel Cremers2, Liang-Chieh Chen1\n1ByteDance2Technical University Munich * equal contribution\nhttps://yucornetto.github.io/projects/titok.html\n32 tokensTiTok(ours)256 tokensVQGAN65536 pixelsrealimagelatent size and costsImage Reconstruction32 tokens can work well for…\nImage Generation(TiTok32tokens)'),
 Document(metadata={'source': '/root/users/jusjus/Self/2406.07550v1.pdf', 'page': 0}, page_content='Image Generation(TiTok32tokens)\nFigure 1: We propose TiTok , a compact 1Dtokenizer leveraging region redundancy to represent an\nimage with only 32tokens for image reconstruction and generation.\nAbstract\nRecent advancements in generative models have highlighted the crucial role of\nimage tokenization in the efficient synthesis of high-resolution images. Tokeniza-

In [2]:
### Index

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_milvus import Milvus
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader

urls = [
    "https://arxiv.org/pdf/2406.07550"
]
"""urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]


# Read Web doc
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)
"""

# Read local pdf
DOC_PATH = "/root/users/jusjus/Self/apple_10K.pdf"
loader = PyPDFLoader(DOC_PATH)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
doc_splits = text_splitter.split_documents(pages)

# Add to Milvus
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
vectorstore = Milvus.from_documents(
    documents=doc_splits,
    collection_name="rag_milvus",
    embedding=hf,
    connection_args={"uri": "./milvus_rag.db"},

)
retriever = vectorstore.as_retriever()


  hf = HuggingFaceEmbeddings(
  from tqdm.autonotebook import tqdm, trange


# Sample

In [10]:
### Retrieval Grader 

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""You are a grader assessing the relevance 
    of a retrieved document to a financial auditing question. If the document contains financial data, key figures, or information 
    that directly supports the audit objective, grade it as relevant. Use domain knowledge of financial auditing to assess the match.
    
    Give a binary score 'yes' or 'no' to indicate whether the document is relevant to the question.
    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
    
    Here is the retrieved document: 
    {document}
    
    Here is the auditing question: 
    {question}
    """,
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = "agent memory"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "agent memory",
  "document": "Finite context length: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to learn from past mistakes would benefit a lot from long or infinite context windows. Although vector stores and retrieval can provide access to a larger knowledge pool, their representation power is not as powerful as full attention.\n\n\nChallenges in long-term planning and task decomposition: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.\n\n\nReliability of natural language interface

In [11]:
### Generate

from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = PromptTemplate(
    template="""You are an assistant specializing in financial auditing. 
    Use the following financial data and contextual information to provide a concise and accurate response to the question. If the information is insufficient, 
    state that you do not have enough data to answer the question.
    
    Use three sentences maximum and keep the answer precise:
    Auditing Question: {question} 
    Financial Data Context: {context} 
    Auditor's Response: 
    """,
    input_variables=["question", "document"],
)

llm = ChatOllama(model=local_llm, temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
question = "agent memory"
docs = retriever.invoke(question)
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > prompt:PromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > prompt:PromptTemplate] [0ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequence > llm:ChatOllama] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are an assistant for question-answering tasks. \n    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. \n    Use three sentences maximum and keep the answer concise:\n    Question: agent memory \n    Context: [Document(metadata={'pk': 453236105473687587, 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Each element is an observation, an event directly provided by the agent.\\n-

In [12]:
### Hallucination Grader 

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""You are a grader assessing whether 
    an auditor's response is grounded in the provided financial data or audit evidence. Determine if the response directly references or is supported by the data.
    
    Give a binary score 'yes' or 'no' to indicate whether the response is grounded in the provided evidence. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
    
    Here is the audit evidence:
    {documents} 
    
    Here is the auditor's response: 
    {generation}
    """,
    input_variables=["generation", "documents"],
)

hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"documents": docs, "generation": generation})

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > prompt:PromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > prompt:PromptTemplate] [0ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequence > llm:ChatOllama] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are a grader assessing whether \n    an answer is grounded in / supported by a set of facts. Give a binary score 'yes' or 'no' score to indicate \n    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a \n    single key 'score' and no preamble or explanation.\n    \n    Here are the facts:\n    [Document(metadata={'pk': 453236105473687587, 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Each element i

{'score': 'yes'}

In [13]:
### Answer Grader 

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""You are a grader assessing whether an 
    auditor's response effectively addresses the auditing question. Use your knowledge of financial auditing to evaluate if the response is useful 
    and actionable for resolving the question.
    
    Give a binary score 'yes' or 'no' to indicate whether the response is useful. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
    
    Here is the auditing question: 
    {question}
    
    Here is the auditor's response:
    {generation}
    """,
    input_variables=["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"question": question,"generation": generation})

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "agent memory",
  "generation": "The agent's memory is informed by the retrieval model, which surfaces context based on relevance, recency, and importance. The reflection mechanism synthesizes memories into higher-level inferences over time to guide the agent's future behavior."
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > prompt:PromptTemplate] Entering Prompt run with input:
[0m{
  "question": "agent memory",
  "generation": "The agent's memory is informed by the retrieval model, which surfaces context based on relevance, recency, and importance. The reflection mechanism synthesizes memories into higher-level inferences over time to guide the agent's future behavior."
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > prompt:PromptTemplate] [0ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequenc

{'score': 'yes'}

In [8]:
### Router

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""You are an expert in financial auditing. 
    Route the user's question to the appropriate data source based on its context. Use 'financial_reports' for questions about balance sheets, income statements, or audit procedures. 
    Use 'external_sources' for questions requiring external benchmarks or general audit standards.
    
    Provide a binary choice 'financial_reports' or 'external_sources' based on the question. Return the result as a JSON with a single key 'datasource' and no preamble or explanation.
    
    Here is the auditing question: 
    {question}
    """,
    input_variables=["question"],
)

question_router = prompt | llm | JsonOutputParser()
question = "llm agent memory"
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(question_router.invoke({"question": question}))

  llm = ChatOllama(model=local_llm, format="json", temperature=0)
  docs = retriever.get_relevant_documents(question)


[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "llm agent memory"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > prompt:PromptTemplate] Entering Prompt run with input:
[0m{
  "question": "llm agent memory"
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > prompt:PromptTemplate] [0ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequence > llm:ChatOllama] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are an expert at routing a \n    user question to a vectorstore or web search. Use the vectorstore for questions on LLM  agents, \n    prompt engineering, and adversarial attacks. You do not need to be stringent with the keywords \n    in the question related to these topics. Otherwise, use web-search. Give a binary choice 'web_search' \n    or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and

In [20]:
### Search

from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

# Full

In [21]:
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents 
    """
    question : str
    generation : str
    web_search : str
    documents : List[str]

from langchain.schema import Document

### Nodes

def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}

def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    
    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}

def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]
    
    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        grade = score['score']
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}
    
def web_search(state):
    """
    Web search based based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}

### Conditional edge

def route_question(state):
    """
    Route question to web search or RAG.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    print(question)
    source = question_router.invoke({"question": question})  
    print(source)
    print(source['datasource'])
    if source['datasource'] == 'web_search':
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    elif source['datasource'] == 'vectorstore':
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"

def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

### Conditional edge

def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score['score']

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question,"generation": generation})
        grade = score['score']
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

from langgraph.graph import END, StateGraph
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("websearch", web_search) # web search
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate) # generatae

<langgraph.graph.state.StateGraph at 0x7f80ec5dfa60>

In [22]:
# Build graph
workflow.set_conditional_entry_point(
    route_question,
    {
        "websearch": "websearch",
        "vectorstore": "retrieve",
    },
)

workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate",
    },
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)

<langgraph.graph.state.StateGraph at 0x7f80ec5dfa60>

In [27]:
# Test
from pprint import pprint
inputs = {"question": "Did Emmanuel Macron visit Germany recently?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])

[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph] Entering Chain run with input:
[0m{
  "question": "Did Emmanuel Macron visit Germany recently?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph > chain:__start__] Entering Chain run with input:
[0m{
  "question": "Did Emmanuel Macron visit Germany recently?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph > chain:__start__ > chain:ChannelWrite<question,generation,web_search,documents>] Entering Chain run with input:
[0m{
  "question": "Did Emmanuel Macron visit Germany recently?"
}
[36;1m[1;3m[chain/end][0m [1m[chain:LangGraph > chain:__start__ > chain:ChannelWrite<question,generation,web_search,documents>] [0ms] Exiting Chain run with output:
[0m{
  "question": "Did Emmanuel Macron visit Germany recently?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph > chain:__start__ > chain:route_question] Entering Chain run with input:
[0m{
  "question": "Did Emmanuel Macron visit Germany recently?"
}
---ROUTE QUESTI

KeyError: 'documents'