Environment

In [99]:
! pip install --quiet langchain langchain_cohere  langchain_openai tiktoken langchainhub chromadb langgraph 
! pip install beautifulsoup4



In [100]:
###LLMs
import os

os.environ["http_proxy"] = "http://localhost:7890"
os.environ["https_proxy"] = "http://localhost:7890"

cohere_api_key = os.environ["COHERE_API_KEY"]
print(cohere_api_key)

nXpHCXykO8o6YB9A57EQpNBxZKX5ngGy1rzbv19h


Index

In [101]:
### Build Index

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_cohere import CohereEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma

# Set embeddings
embd = CohereEmbeddings()

# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorstore  矢量存储
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    embedding=embd,
)

retriever = vectorstore.as_retriever()


LLMs

In [102]:
### Router

from langchain_cohere import ChatCohere                    #ChatCohere类，用于处理自然语言
from langchain_core.prompts import ChatPromptTemplate      #ChatPromptTemplate类，用于创建交互式提示模板
from langchain_core.pydantic_v1 import BaseModel, Field    #BaseModel和Field类，用于定义数据模型


# Data model
class web_search(BaseModel):
    """
    The internet. Use web_search for questions that are related to anything else than agents, prompt engineering, and adversarial attacks.
    """

    query: str = Field(description="The query to use when searching the internet.")


class vectorstore(BaseModel):
    """
    A vectorstore containing documents related to agents, prompt engineering, and adversarial attacks. Use the vectorstore for questions on these topics.
    """

    query: str = Field(description="The query to use when searching the vectorstore.")


# Preamble 引言
preamble = """You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. Otherwise, use web-search."""

# LLM with tool use and preamble    
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_router = llm.bind_tools(                      #工具绑定
    tools=[web_search, vectorstore], preamble=preamble
)

# Prompt
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{question}"),
    ]
)

#组合 route_prompt 和 structured_llm_router 来处理用户的问题
question_router = route_prompt | structured_llm_router

response = question_router.invoke(
    {"question": "Who will the Bears draft first in the NFL draft?"}
)
print(response)
print(response.response_metadata["tool_calls"],end="\n\n")            #打印工具调用元数据信息   

response = question_router.invoke({"question": "What are the types of agent memory?"})
print(response)
print(response.response_metadata["tool_calls"],end="\n\n")

response = question_router.invoke({"question": "Hi how are you?"})
print(response)
print("tool_calls" in response.response_metadata)

content='I will search for who the Bears will pick in the NFL draft and relay this information to the user.' additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '5be30aff-a381-4728-8e20-c81c786c7f18', 'tool_calls': [{'id': 'd142477529484b3993fee8a549291b26', 'function': {'name': 'web_search', 'arguments': '{"query": "who will the bears pick in the NFL draft"}'}, 'type': 'function'}], 'token_count': {'input_tokens': 915, 'output_tokens': 71}} response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '5be30aff-a381-4728-8e20-c81c786c7f18', 'tool_calls': [{'id': 'd142477529484b3993fee8a549291b26', 'function': {'name': 'web_search', 'arguments': '{"query": "who will the bears pick in the NFL draft"}'}, 'type': 'function'}], 'token_count': {'input_tokens': 915, 'output_tokens': 71}} id='run-c703f929-c3f4

In [103]:
### Retrieval Grader

# Data model 
# 文档相关性评分
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


# Prompt
preamble = """You are a grader assessing relevance of a retrieved document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

# LLM with function call
llm = ChatCohere(model="command-r", temperature=0)

structured_llm_grader = llm.with_structured_output(GradeDocuments, preamble=preamble)

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),          #根据已检索文档 + 用户问题进行判断
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader

question = "types of agent memory"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content           #第二个页面的内容

#根据问题和检索文档内容进行相关性评分
response = retrieval_grader.invoke({"question": question, "document": doc_txt})

print(docs,end="\n\n")
print("question:" + question,end="\n\n")
print("document:"+ doc_txt,end="\n\n")
print(response)

[Document(metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}, page_content='inquired about current trends in anticancer drug discovery;\nselected a target;\nrequested a scaffold targeting these compounds;\nOnce the compound was identified, the model attempted its synthesis.\n\nThey also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a l

Generate

In [104]:
### Generate

from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import StrOutputParser  #解析字符串输出

# Preamble 回答问题助手
preamble = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise."""

# LLM
llm = ChatCohere(model_name="command-r", temperature=0).bind(preamble=preamble)

# Prompt
def prompt(x):
    return ChatPromptTemplate.from_messages(
        [
            HumanMessage(
                f"Question: {x['question']} \nAnswer: ",
                additional_kwargs={"documents": x["documents"]},
            )
        ]
    )

# Chain 链路设计
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"documents": docs, "question": question})

print(docs,end="\n\n")
print("question:" + question,end="\n\n")

print(type(generation))
print(f"generation:", generation)

[Document(metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}, page_content='inquired about current trends in anticancer drug discovery;\nselected a target;\nrequested a scaffold targeting these compounds;\nOnce the compound was identified, the model attempted its synthesis.\n\nThey also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a l

In [105]:
### LLM fallback

from langchain_core.output_parsers import StrOutputParser

# Preamble
preamble = """You are an assistant for question-answering tasks. Answer the question based upon your knowledge. Use three sentences maximum and keep the answer concise."""

# LLM
llm = ChatCohere(model_name="command-r", temperature=0).bind(preamble=preamble)


# Prompt
def prompt(x):
    return ChatPromptTemplate.from_messages(
        [HumanMessage(f"Question: {x['question']} \nAnswer: ")]
    )


# Chain 
llm_chain = prompt | llm | StrOutputParser()

# Run
question = "Hi how are you?"
generation = llm_chain.invoke({"question": question})

print(f"question:",question)
print(f"generation:", generation)

question: Hi how are you?
generation: I don't have feelings as an AI chatbot, but I'm here to assist you with any questions or tasks you may have. How can I help you today?


In [106]:
### Hallucination Grader

# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

# Preamble
preamble = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""

# LLM with function call
llm = ChatCohere(model="command-r", temperature=0)

structured_llm_grader = llm.with_structured_output(
    GradeHallucinations, preamble=preamble
)

# Prompt
# 根据事实和LLMs生成答案进行幻觉判断
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        # ("system", system),
       
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

#chain
hallucination_grader = hallucination_prompt | structured_llm_grader

print(docs,end="\n\n")
print(f"generation:", generation)
hallucination_grader.invoke({"documents": docs, "generation": generation})

[Document(metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}, page_content='inquired about current trends in anticancer drug discovery;\nselected a target;\nrequested a scaffold targeting these compounds;\nOnce the compound was identified, the model attempted its synthesis.\n\nThey also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a l

GradeHallucinations(binary_score='no')

In [107]:
### Answer Grader


# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )


# Preamble
preamble = """You are a grader assessing whether an answer addresses / resolves a question \n
Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""


# LLM with function call
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeAnswer, preamble=preamble)

# Prompt
# 根据用户问题和LLMs生成答案怕判断是否回答了问题
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

#chain
answer_grader = answer_prompt | structured_llm_grader

print("question：" + question,end="\n\n")
print(f"generation:", generation)

answer_grader.invoke({"question": question, "generation": generation})

question：Hi how are you?

generation: I don't have feelings as an AI chatbot, but I'm here to assist you with any questions or tasks you may have. How can I help you today?


GradeAnswer(binary_score='yes')

Web Search Tool

In [108]:

### Search
import os 
taily_api_ker=os.environ['TAVILY_API_KEY']
print(taily_api_ker)

from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults()

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


Graph
Capture the flow in as a graph.

Graph state

In [109]:
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):             
    """|
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
    """

    question: str
    generation: str
    documents: List[str]

Graph Flow

In [110]:
from langchain.schema import Document
import pprint

def retrieve(state):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}


def llm_fallback(state):
    """
    Generate answer using the LLM w/o vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---LLM Fallback---")
    question = state["question"]
    generation = llm_chain.invoke({"question": question})
    return {"question": question, "generation": generation}


def generate(state):
    """
    Generate answer using the vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    if not isinstance(documents, list):
        documents = [documents]

    # RAG generation
    generation = rag_chain.invoke({"documents": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}


def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue
    return {"documents": filtered_docs, "question": question}


def web_search(state):
    """
    Web search based on the re-phrased question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with appended web results
    """

    print("---WEB SEARCH---")
    question = state["question"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)

    return {"documents": web_results, "question": question}


### Edges ###

# 将问题路由至网络搜索或 RAG
def route_question(state):
    
    """
    Route question to web search or RAG.    

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    source = question_router.invoke({"question": question})

    # Fallback to LLM or raise error if no decision
    if "tool_calls" not in source.additional_kwargs:     #额外关键词
        print("---ROUTE QUESTION TO LLM---")
        return "llm_fallback"
    #tool_calls 列表为空
    if len(source.additional_kwargs["tool_calls"]) == 0:
        raise "Router could not decide source"

    # Choose datasource
    datasource = source.additional_kwargs["tool_calls"][0]["function"]["name"]
    #根据工具调用信息选择数据来源(下一步操作)
    if datasource == "web_search":
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "web_search"
    elif datasource == "vectorstore":
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"
    else:
        print("---ROUTE QUESTION TO LLM---")
        return "vectorstore"                      ###有点奇怪 不是llm_fallback吗？

#确定是否生成答案，或者重新生成问题。
def decide_to_generate(state):
    """
    Determines whether to generate an answer, or re-generate a question.
    
    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")          #正在评估已分级的文档

    state["question"]
    filtered_documents = state["documents"]

    #不存在相关文档
    if not filtered_documents:
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        # 所有文档都不相关于问题，需要进行网络搜索。
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, WEB SEARCH---")
        return "web_search"
    
    #存在相关文档
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

# 确定生成是否基于文档并回答问题
def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")                

    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    #幻觉打分（yes\no）
    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    print(score)
    
    grade = score.binary_score

    # Check hallucination
    if grade == "yes":  # 生成的内容基于文档
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")

        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")  # 生成的内容是否回答了问题
        # 根据问题与生成内容进行评分
        score = answer_grader.invoke({"question": question, "generation": generation})

        if score is not None:
            grade = score.binary_score
            # 根据问题回答评分做出决策
            if grade == "yes":
                print("---DECISION: GENERATION ADDRESSES QUESTION---")
                return "useful"
            else:
                print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
                return "not useful"
        else:
            print("---SCORE OBJECT IS NONE, UNABLE TO GRADE ANSWER---")
            return "not useful"
    else:  # 生成的内容未基于文档
        print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")  # 建议重新尝试
        return "not supported"

Build Graph

In [111]:
import pprint

from langgraph.graph import END, StateGraph, START

workflow = StateGraph(GraphState)

# Define the nodes 和对应函数 
workflow.add_node("web_search", web_search)            # web search
workflow.add_node("retrieve", retrieve)                # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)                # rag
workflow.add_node("llm_fallback", llm_fallback)        # llm

### Build graph ###   
# 添加条件边缘
workflow.add_conditional_edges(
    START,                               # 开始节点
    route_question,                      # 路由问题的函数
    {
        "web_search": "web_search",      # 如果路由函数返回 "web_search"，转到网络搜索节点
        "vectorstore": "retrieve",       # 如果返回 "vectorstore"，转到检索节点
        "llm_fallback": "llm_fallback",  # 如果返回 "llm_fallback"，转到LLM备选节点
    },
)

# 添加普通边缘
workflow.add_edge("web_search", "generate")       # 网络搜索节点完成后转到生成节点
workflow.add_edge("retrieve", "grade_documents")  # 检索节点完成后转到评分文档节点

# 添加条件边缘
workflow.add_conditional_edges(
    "grade_documents",                     # 评分文档节点
    decide_to_generate,                    # 决定是否生成的函数
    {
        "web_search": "web_search",        # 如果决定生成，转到网络搜索节点
        "generate": "generate",            # 如果决定不生成，转到生成节点
    },
)

# 添加条件边缘
workflow.add_conditional_edges(
    "generate",                                      # 生成节点
    grade_generation_v_documents_and_question,       # 评分生成文档和问题的函数
    {
        "not supported": "generate",                 # Hallucinations: re-generate
        "not useful": "web_search",                  # Fails to answer question: fall-back to web-search
        "useful": END,                               #生成结果有用，结束流程
    },
)

# 添加普通边缘
workflow.add_edge("llm_fallback", END)               # LLM备选节点完成后结束流程

# Compile 
# 编译状态图
app = workflow.compile()

In [113]:
import time

# Run
inputs = {
    "question": "What player are the Bears expected to draft first in the 2024 NFL draft?"
}

# 遍历状态图的输出结果
for output in app.stream(inputs):
    for key, value in output.items():
        # Node 打印节点的信息
        pprint.pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # 可选：打印每个节点的完整状态
    pprint.pprint("\n---\n")

time.sleep(6)  # 增加 6 秒的延迟，确保不会超过 API 的调用限制

# Final generation
pprint.pprint(value["generation"])

---ROUTE QUESTION---
---ROUTE QUESTION TO WEB SEARCH---
---WEB SEARCH---
"Node 'web_search':"
'\n---\n'
---GENERATE---
---CHECK HALLUCINATIONS---
binary_score='yes'
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---SCORE OBJECT IS NONE, UNABLE TO GRADE ANSWER---
"Node 'generate':"
'\n---\n'
---WEB SEARCH---
"Node 'web_search':"
'\n---\n'
---GENERATE---
---CHECK HALLUCINATIONS---
None


AttributeError: 'NoneType' object has no attribute 'binary_score'

Trace:
https://smith.langchain.com/public/623da7bb-84a7-4e53-a63e-7ccd77fb9be5/r

In [None]:
# Run
inputs = {"question": "What are the types of agent memory?"}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint.pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint.pprint("\n---\n")

time.sleep(6)  # 增加 6 秒的延迟，确保不会超过 API 的调用限制

# Final generation
pprint.pprint(value["generation"])

---ROUTE QUESTION---


TooManyRequestsError: status_code: 429, body: data=None message="You are using a Trial key, which is limited to 10 API calls / minute. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"

Trace:
https://smith.langchain.com/public/57f3973b-6879-4fbe-ae31-9ae524c3a697/r

In [None]:
# Run
inputs = {"question": "Hello, how are you today?"}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint.pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint.pprint("\n---\n")

time.sleep(6)  # 增加 6 秒的延迟，确保不会超过 API 的调用限制

# Final generation
pprint.pprint(value["generation"])

---ROUTE QUESTION---
---ROUTE QUESTION TO LLM---
---LLM Fallback---
"Node 'llm_fallback':"
'\n---\n'
('I am an AI assistant, and I do not have feelings or emotions. I am '
 'functioning as normal, and I am ready to assist you with your queries. How '
 'can I help you today?')


Trace:
https://smith.langchain.com/public/1f628ee4-8d2d-451e-aeb1-5d5e0ede2b4f/r