In [3]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'advanced-rag'
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv("GROQQ_API_KEY")

RETRIEVER

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings


urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size = 250,
    chunk_overlap = 0,
)
doc_splits = text_splitter.split_documents(docs_list)

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

hf_embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

vectorstore = Chroma.from_documents(
    documents = doc_splits,
    collection_name= "rag-chroma",
    embedding=hf_embeddings,
)

retriever = vectorstore.as_retriever()

USER_AGENT environment variable not set, consider setting it to identify your requests.
  hf_embeddings = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm
Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


LLMS

In [8]:
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq

class GradeDocuments(BaseModel):
    binary_store: str = Field(description= "Documents are relevant to the question, 'yes' or 'no'")

llm = ChatGroq(
    model = "llama3-70b-8192",
    temperature = 0,
)

structured_llm_grader = llm.with_structured_output(GradeDocuments)

system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {documents} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
question = "agent memory"
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({
    "question": question, 
    "documents": doc_txt
}))

binary_store='yes'


GENERATE

In [10]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

prompt = hub.pull("rlm/rag-prompt")

llm = ChatGroq(
    model = "llama3-70b-8192",
    temperature = 0,
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = prompt | llm  | StrOutputParser()

generation = rag_chain.invoke({
    "context": docs,
    "question": question,
})

print(generation)

In a LLM-powered autonomous agent system, the agent has two types of memory: short-term memory and long-term memory. Short-term memory refers to in-context learning, such as prompt engineering, where the model learns from the input prompt. Long-term memory allows the agent to retain and recall information over extended periods, often by leveraging an external vector store and fast retrieval.


HALLUCINATION GRADER

In [14]:
class GradeHallucination(BaseModel):
    binary_store: str = Field(description= "Answer is grounded in the facts, 'yes' or 'no'")

llm = ChatGroq(
    model = "llama3-70b-8192",
    temperature = 0,
)

structured_llm_grader = llm.with_structured_output(GradeHallucination)

system =  """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""

hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader
hallucination_grader.invoke({
    "documents": docs,
    "generation": generation
})

print(hallucination_grader.invoke)

<bound method RunnableSequence.invoke of ChatPromptTemplate(input_variables=['documents', 'generation'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template="You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n \n     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['documents', 'generation'], input_types={}, partial_variables={}, template='Set of facts: \n\n {documents} \n\n LLM generation: {generation}'), additional_kwargs={})])
| RunnableBinding(bound=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000272F3ACCE50>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000272F3ACD650>, model_name='llama3-70b-8192',

ANSWER GRADER

In [15]:
class GradeAnswer(BaseModel):
    binary_score: str = Field(description= "Answer addresses the question, 'yes' or 'no'")

llm = ChatGroq(
    model = "llama3-70b-8192",
    temperature = 0,
)

structured_llm_grader = llm.with_structured_output(GradeAnswer)

system = """You are a grader assessing whether an answer addresses / resolves a question \n 
     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""

answer_prompt = ChatPromptTemplate.from_messages([
    ("system", system),
    ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
])

answer_grader = answer_prompt | structured_llm_grader
answer_grader.invoke({
    "question": question,
    "generation": generation
})

GradeAnswer(binary_score='yes')

QUESTION RE - WRITER

In [16]:
llm = ChatGroq(
    model = "llama3-70b-8192",
    temperature = 0,
)

system = """You a question re-writer that converts an input question to a better version that is optimized \n 
     for vectorstore retrieval. Look at the input and try to reason about the underlying sematic intent / meaning. Just say the question. I dont need any explanation just question is enough"""

re_writer_prompt = ChatPromptTemplate.from_messages([
    ("system", system),
    ("human", "Here is the initial question: \n\n {question} \n Formulate an improved question."),
])

question_rewriter = re_writer_prompt | llm | StrOutputParser()
question_rewriter.invoke({
    "question": question
})

'What is the role of memory in artificial intelligence agents?'

GRAPH

In [17]:
from typing_extensions import TypedDict
from typing import List

class GraphState(TypedDict):
    question: str
    generation: str
    documents: List[str]

NODES

In [18]:
from langchain.schema import Document

def retrieve(state):

    print("---RETRIEVE---")
    question = state["question"]

    documents = retriever.get_relevant_documents(question)
    return {"documents": documents, "question": question}


In [19]:
def generate(state):
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    generation = rag_chain.invoke({
        "context": documents,
        "question": question
    })
    return {"documents": documents, "question": question, "generation": generation}

In [28]:
def grade_documents(state):
    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")

    question = state["question"]
    documents = state["documents"]

    filtered_docs = []
    for d in documents:
        score = retrieval_grader.invoke({
            "question": question, 
            "documents": d.page_content
        })
        grade = score.binary_store
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue
    return {"documents": filtered_docs, "question": question}

In [21]:
def transform_query(state):
    print("---TRANSFORM QUERY---")
    question = state["question"]
    documents = state["documents"]

    better_question = question_rewriter.invoke({
        "question": question
    })
    return {"documents": documents, "question": better_question}

In [22]:
def decide_to_generate(state):
    print("---ASSESS GRADED DOCUMENTS---")

    question = state["question"]
    filtered_documents = state["documents"]

    if not filtered_documents:
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---")
        return "transform_query"
    else:
        print("---DECISION: GENERATE---")
        return "generate"

In [30]:
def grade_generation_v_documents_and_question(state):
    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke({
        "documents": documents,
        "generation": generation
    })
    grade = score.binary_score

    if grade == "yes":
         print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
         score = answer_grader.invoke({
            "question": question,
            "generation": generation
         })
         grade = score.binary_store
         if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
         else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

BUILD GRAPH

In [31]:
from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

workflow.add_node("retrieve", retrieve) 
workflow.add_node("grade_documents", grade_documents) 
workflow.add_node("generate", generate) 
workflow.add_node("transform_query", transform_query)

workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    }
)

workflow.add_edge("transform_query", "retrieve")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "transform_query",
    },
)

app = workflow.compile()

In [32]:
from pprint import pprint

inputs = {
    "question": "Explain how the different types of agent memory work?"
}

for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Node '{key}' :")
    pprint("\n---\n")

pprint(value["generation"])

---RETRIEVE---
"Node 'retrieve' :"
'\n---\n'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
"Node 'grade_documents' :"
'\n---\n'
---GENERATE---
---CHECK HALLUCINATIONS---


AttributeError: 'GradeHallucination' object has no attribute 'binary_score'