In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

In [45]:
embed = HuggingFaceEmbeddings(
    model = "sentence-transformers/all-MiniLM-L6-v2"
)

In [46]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/"
]

In [47]:
# load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size = 500,
    chunk_overlap = 50
)

doc_splits = text_splitter.split_documents(docs_list)

## Add vector
vector_store = FAISS.from_documents(
    documents=doc_splits,
    embedding=embed
)

retriever = vector_store.as_retriever()


In [48]:
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field

In [49]:
# Data Model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource"""
    
    datasource: Literal["vectorstore", "web_search"] = Field(...,
        description="Given a user question choose to route it to web search or veectorstore"
    )

In [50]:
llm = ChatGroq(model = "openai/gpt-oss-120b")

structure_llm_route = llm.with_structured_output(RouteQuery)

In [51]:
#Prompt
system = """You are a expert at routing a user question to vectorstore or web search. The voctorstore contains documents releted to agents, prompt engineering and adversarial attacks. Use vectorstore for questions on these topics. Otherwise, use web-search."""

route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}")
    ]
)


question_router = route_prompt | structure_llm_route

In [52]:
question_router.invoke({"question" : "Who is the best footballer in the world?"})

RouteQuery(datasource='web_search')

In [53]:
question_router.invoke({"question" : "What are the types of agent memory?"})

RouteQuery(datasource='vectorstore')

In [54]:
## Retrieval Grader

class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""
    
    binary_score : str = Field(description="Documents are relevant to the question, 'yes' or 'no' ")
    

structure_llm_grader = llm.with_structured_output(GradeDocuments)

In [59]:
# Prompt
system = """You are a grader assessing relevance of retrieved document to a user question. \n
If the document contains keywords(s) or semantic meaning related to the user question. grade it as relevant. \n
It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}")
    ]
)

retrieval_grader = grade_prompt | structure_llm_grader
question = "agent memory"
docs = retriever.invoke(question)
doc_txt = "\n\n".join([doc.page_content for doc in docs])

In [61]:
retrieval_grader.invoke({"question" : question, "document" : doc_txt})

GradeDocuments(binary_score='yes')

In [62]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

In [64]:
prompt = hub.pull("rlm/rag-prompt")

rag_chain = prompt | llm | StrOutputParser()

generation = rag_chain.invoke({"context" : docs, "question" : question})
print(generation)

Agent memory consists of two layers. Short‑term memory is the model’s in‑context learning, where recent observations are kept in the prompt window. Long‑term memory is an external store (e.g., a vector database) that lets the agent retain and retrieve unlimited information over time.


In [65]:
## Hallucination Grader

class GradeHallucination(BaseModel):
    """Binary score for hallucination present in generation answer"""
    
    binary_score : str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )
    
structure_llm_hallucination = llm.with_structured_output(GradeHallucination)

In [66]:
system = """You are grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts.\n
Give a binary score 'yes' or 'no'. 'yes' means that the answer is grounded in / supported by the set of facts."""

hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}")
    ]
)

hallucination_grade = hallucination_prompt | structure_llm_hallucination
hallucination_grade.invoke({"documents" : docs, "generation" : generation})

GradeHallucination(binary_score='yes')

In [67]:
## Answer Grader

class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question"""
    
    binary_score : str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )
    
structure_llm_answer = llm.with_structured_output(GradeAnswer)

In [68]:
system = """You are a grader assessing whether an answer address / resolves a question \n 
Give a binary score 'yes' or 'no'. yes means that the answer resolves the question"""

answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}")
    ]
)

answer_grader = answer_prompt | structure_llm_answer

answer_grader.invoke({"question" : question, "generation" : generation})

GradeAnswer(binary_score='yes')

In [69]:
## Question re-writer

system = """You are a question re-writer that converts an input question to a better version that is optimized \n 
for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""

re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Here is the initial question: \n\n {question} \n Formulated an improved question.")
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()
question_rewriter.invoke({"question" : question})

'**Improved question:**  \n*What is “agent memory” in the context of autonomous or AI agents, how is it typically implemented, and what are the common methods for storing, updating, and retrieving an agent’s past experiences or contextual information?*'

In [70]:
# Search

from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(k=3)

  web_search_tool = TavilySearchResults(k=3)
