In [9]:
local_llm = "llama3"

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250,
    chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

vectorstore = Chroma.from_documents(documents=doc_splits, collection_name="rag-chroma", embedding=GPT4AllEmbeddings())
retriever = vectorstore.as_retriever()

# Retrieval Grader

In [10]:
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""You are a grader assessing relevance of a retrieved document to a user question. 
    If the document contains keywords related to the user question, grade it as relevant. It does
    not need to be a stringent test. The goal is to filter out erroneous retrievals. Give a binary
    score 'yes' or 'no' score to indicate whether the document is relevant to the question. 
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
    Here is the retrieved document:  
    {document}
    
    Here is the user question: 
    {question}
    """,
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = "What are the types of llm agent memory?"
docs = retriever.invoke(question)
doc_text = docs[1].page_content 
print(retrieval_grader.invoke({"question": question, "document": doc_text}))

{'score': 'yes'}


In [11]:
print(docs[0])

page_content='They also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a list of known chemical weapon agents and asked the agent to synthesize them. 4 out of 11 requests (36%) were accepted to obtain a synthesis solution and the agent attempted to consult documentation to execute the procedure. 7 out of 11 were rejected and among these 7 rejected cases, 5 happened after a Web search while 2 were rejected based on prompt only.\nGenerative Agents Simulation#\nGenerative Agents (Park, et al. 2023) is super fun experiment where 25 virtual characters, each controlled by a LLM-powered agent, are living and interacting in a sandbox environment, inspired by The Sims. Generative agents create believable simulacra of human behavior for interactive applications.\nThe design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with

# Generate

In [35]:
from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. 
    You should use three sentences maximum and keep the answer as concise as possible. 
    
    This is question: 
    ```{question}``` 
    
    This is context: 
    ```{context}```""",
    input_variables=["question", "context"],
)

llm = ChatOllama(model=local_llm, temperature=0)

def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

rag_chain = prompt | llm | StrOutputParser()

docs = retriever.invoke(question)
generation = rag_chain.invoke({"question": question, "context": format_docs(docs)})
print(generation)

The types of LLM agent memory mentioned in the context are:

* Memory stream: a long-term memory module that records a comprehensive list of agents' experience in natural language.

Note that there might be other types of memory not explicitly mentioned in this context.<|eot_id|><|start_header_id|>assistant<|end_header_id|>

I apologize for the mistake earlier. Based on the provided context, it seems that there is only one type of LLM agent memory mentioned:

* Memory stream: a long-term memory module (external database) that records a comprehensive list of agents' experience in natural language.

If you're looking for more information on types of memory, I don't have any additional details to provide based on this context.<|eot_id|><|start_header_id|>assistant<|end_header_id|>

I apologize for the mistake earlier. Based on the provided context, it seems that there is only one type of LLM agent memory mentioned:

* Memory stream: a long-term memory module (external database) that recor

# Hallucination Grader

In [31]:
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""You are a grader assessing whether an answer is grounded in supported 
    by a set of facts. 
    You must return a JSON with a single key 'score', if the answer is grounded 
    in supported by the facts, the value should be 'yes', otherwise the value should be 'no'.
    Your answer should not contain any preamble, explanation or other keys.
    
    Your answer must be a json, the key must only be 'score', the value must be 'yes' or 'no'.
    Here are the facts:
    ```{documents}```
    
    Here is the answer: 
    ```{generation}```""",
    input_variables=["generation", "documents"],
)

hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"documents": format_docs(docs), "generation": generation})

{'status_code': 400, 'error_message': 'Invalid request'}

In [33]:
print(format_docs(docs))

They also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a list of known chemical weapon agents and asked the agent to synthesize them. 4 out of 11 requests (36%) were accepted to obtain a synthesis solution and the agent attempted to consult documentation to execute the procedure. 7 out of 11 were rejected and among these 7 rejected cases, 5 happened after a Web search while 2 were rejected based on prompt only.
Generative Agents Simulation#
Generative Agents (Park, et al. 2023) is super fun experiment where 25 virtual characters, each controlled by a LLM-powered agent, are living and interacting in a sandbox environment, inspired by The Sims. Generative agents create believable simulacra of human behavior for interactive applications.
The design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.

M

In [34]:
print(generation)

According to the provided context, there are two types of LLM agent memory mentioned:

* Memory stream: a long-term memory module (external database) that records a comprehensive list of agents' experience in natural language.

I don't know if there are any other types of LLM agent memory beyond what is discussed in this specific context.<|eot_id|><|start_header_id|>assistant<|end_header_id|>

According to the provided context, there are two types of LLM agent memory mentioned:

* Memory stream: a long-term memory module (external database) that records a comprehensive list of agents' experience in natural language.

I don't know if there are any other types of LLM agent memory beyond what is discussed in this specific context.<|eot_id|><|start_header_id|>assistant<|end_header_id|>

According to the provided context, there are two types of LLM agent memory mentioned:

* Memory stream: a long-term memory module (external database) that records a comprehensive list of agents' experience 

# Answer grader

In [14]:
# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""You are a grader assessing whether an 
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
    
    Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: 
    {question}""",
    input_variables=["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"question": question,"generation": generation})

{'score': 'yes'}

# Router

In [15]:
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""You are an expert at routing a user question to a vectorstore or web search.
    Use the vectorstore for questions on LLM agents, prompt engineering, and adversarial attacks. 
    You do not need to be stringent with the keywords in the question related to these topics. 
    Otherwise, use web-search. Give a binary choice 'web_search' or 'vectorstore' based on the 
    question. Return the a JSON with a single key 'datasource' and no premable or explaination. 
    Question to route: 
    {question}""",
    input_variables=["question"],
)

question_router = prompt | llm | JsonOutputParser()
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(question_router.invoke({"question": question}))

{'datasource': 'vectorstore'}


# Search

In [16]:
### Search

from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

In [22]:
from typing_extensions import TypedDict
from typing import List

class GraphState(TypedDict):
    question: str
    generation: str
    web_search: str
    documents: List[str]

from langchain.schema import Document

def retrieve(state):
    print("---RETRIEVE---")
    question = state["question"]
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}

def generate(state):
    print("---GENERATE---")
    question = state["question"]
    docs = state["documents"]
    generation = rag_chain.invoke({"question": question, "context": format_docs(docs)})
    return {"generation": generation, "documents": docs, "question": question}

def grade_document(state):
    print("---GRADE DOCUMENT---")
    documents = state["documents"]
    question = state["question"]

    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        grade = score["score"]
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            web_search = "Yes"
            continue
        
    return {"documents": filtered_docs, "question": question, "web_search": web_search}

def web_search(state):
    print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]
    
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}

def route_question(state):
    print("---ROUTE QUESTION---")
    question = state["question"]
    source = question_router.invoke({"question": question})
    print(f"{source=}, {question=}")
    if source["datasource"] == "vectorstore":
        print("---ROUTE: VECTORSTORE---")
        return "vectorstore"
    elif source["datasource"] == "web_search":
        print("---ROUTE: WEB SEARCH---")
        return "websearch"

def decide_to_generate(state):
    print("---DECIDE TO GENERATE---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_docs = state["documents"]
    if web_search == "Yes":
        return "websearch"
    else:
        return "generate"

def grade_generation_v_documents_and_question(state):
    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    generation = state["generation"]
    documents = state["documents"]
    score = hallucination_grader.invoke({"documents": format_docs(documents), "generation": generation})
    grade = score.get("score")
    if grade is None:
        print("need review hallucinations check", score)
        return "not supported"
        
    
    if grade == "yes":
        print("---GRADE: GENERATION GROUNDED---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score["score"]
        if grade == "yes":
            print("---GRADE: ANSWER USEFUL---")
            return "useful"
        else:
            print("---GRADE: ANSWER NOT USEFUL---")
            return "not useful"
    else:
        print("---GRADE: GENERATION NOT GROUNDED---")
        return "not supported"
    
from langgraph.graph import END, StateGraph
workflow = StateGraph(GraphState)

workflow.add_node("retrieve", retrieve)
workflow.add_node("generate", generate)
workflow.add_node("grade_documents", grade_document)
workflow.add_node("websearch", web_search)

# Final Langchain graph

In [23]:
workflow.set_conditional_entry_point(
    route_question,
    {
        "websearch": "websearch",
        "vectorstore": "retrieve",
    },
)

workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate",
    },
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)

In [19]:
# Compile
app = workflow.compile()

# Test
from pprint import pprint
inputs = {"question": "What are the types of agent memory?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}")
pprint(value["generation"])

---ROUTE QUESTION---
source={'datasource': 'vectorstore'}, question='What are the types of agent memory?'
---ROUTE: VECTORSTORE---
---RETRIEVE---
'Finished running: retrieve'
---GRADE DOCUMENT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---DECIDE TO GENERATE---
'Finished running: grade_documents'
---GENERATE---
---CHECK HALLUCINATIONS---
need review hallucinations check {}


KeyError: 'not supported'

In [24]:
# Compile
app = workflow.compile()

# Test
from pprint import pprint
inputs = {"question": "Who are the Bears expected to draft first in the NFL draft?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])

---ROUTE QUESTION---
source={'datasource': 'web_search'}, question='Who are the Bears expected to draft first in the NFL draft?'
---ROUTE: WEB SEARCH---
---WEB SEARCH---
'Finished running: websearch:'
---GENERATE---
---CHECK HALLUCINATIONS---
need review hallucinations check {'status_code': 400, 'message': 'Invalid request'}


KeyError: 'not supported'