In [74]:
import os
from dotenv import load_dotenv
from typing import Literal
from pprint import pprint
from langchain import hub
from typing import Literal,List
from typing_extensions import TypedDict
from langchain_cohere import CohereEmbeddings, ChatCohere
from langgraph.graph import END,START,StateGraph
from langchain.schema import Document
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings,ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import Field,BaseModel
from langchain_community.tools.tavily_search import TavilySearchResults



load_dotenv()
langchain_api = os.getenv('LANGCHAIN_API_KEY')
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
COHERE_API_KEY=os.getenv('COHERE_API_KEY')

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

### Index

In [75]:
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

embd = CohereEmbeddings(model="embed-multilingual-v3.0")

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512,chunk_overlap=0
)
splited_text = text_splitter.split_documents(docs_list)

vectorspace = Chroma.from_documents(
    embedding=embd,
    documents=splited_text,
)

retriever = vectorspace.as_retriever()


### Router

In [111]:
class web_search(BaseModel):
    """
    The internet. Use web_search for questions that are related to anything else than agents, prompt engineering, and adversarial attacks.
    """
    query: str=Field(description="The query to use when searching the internet")

class vectorstore(BaseModel):
    """
    A vectorstore containing documents related to agents, prompt engineering, and adversarial attacks. Use the vectorstore for questions on these topics.   
    """
    query: str=Field(description= "The query to use when searching the vectorstore")


preamble = """You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. Otherwise, use web-search."""

llm = ChatCohere(model="command-a-03-2025",temperature=0)
structured_llm_router = llm.bind_tools(tools=[web_search,vectorstore],
                                       preamble=preamble)

route_prompt = ChatPromptTemplate(
    [
        ('user', "{question}")
    ]
)

question_router = route_prompt|structured_llm_router

response = question_router.invoke(
    {"question": "What are the types of agent memory?"}
)


In [77]:
response.additional_kwargs['tool_calls'][0]['function']['name']

'web_search'

### Retireval Grader

In [112]:
# class GradeDocuments(BaseModel):
#     binary_score: Literal["yes","no"] = Field(
#         description="Documents are relevant to the question: 'yes' or 'no'"
#     )

# preamble = """You are a grader assessing relevance of a retrieved document to a user question. \n
# If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
# Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""


# llm = ChatCohere(model="command-a-03-2025",temperature=0)
# structure_llm_grader = llm.with_structured_output(GradeDocuments,preamble=preamble)
# grader_prompt = ChatPromptTemplate.from_messages([
#     ('human',"Retrieved Document: \n\n{documents} \n\n User question: \n\n {question}"),
# ])
class GradeDocuments(BaseModel):
    binary_score : Literal["yes","no"] = Field(
        description="Documents are relevant to the question, 'yes', or 'no'"
    )

llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grader_prompt = ChatPromptTemplate.from_messages(
    [
        ("system" , system),
        ("user", "Retrieved document: \n\n {document} \n\n User question: {question}")
    ]
)

retrieval_grader_chain = grader_prompt|structured_llm_grader
question = "What are the types of agent memory?"
docs = retriever.get_relevant_documents(question)[0].page_content
retrieval_grader_chain.invoke({"document": docs, "question": question})




GradeDocuments(binary_score='yes')

### Generate

In [113]:
preamble = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise."""

llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)

prompt = ChatPromptTemplate.from_messages(
    [
        ('system', preamble),
        ('user', "Question: \n{question}\n\nContext:\n{document}")
    ]
)

rag_generation = prompt|llm|StrOutputParser()

generation= rag_generation.invoke({'question': question, "document":docs})

### Llm Fallback

In [96]:
preamble = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise."""

prompt = ChatPromptTemplate.from_messages(
    [
        ('system', preamble),
        ('user', "Question: \n{question} \n\nAnswer:")
    ]
)

rag_fall_back = prompt|llm|StrOutputParser()
generation=rag_fall_back.invoke({"question":question})



### Hallucination Grader

In [114]:
class GradeHallucination(BaseModel):
    binary_score:Literal ["yes","no"] = Field(
        description="Answer is grounded in facts, 'yes' or 'no"
    )


preamble = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""


llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
structured_llm_hall_grader = llm.with_structured_output(GradeHallucination)

hallucination_prompt = ChatPromptTemplate.from_messages([
    ("system",preamble),
    ("user","Set of Facts:\n\n {documents} \n\nLLM Generation:\n{generation}")
])
hellucination_rag = hallucination_prompt|structured_llm_hall_grader
hellucination_rag.invoke({'documents':docs,"generation":generation})



GradeHallucination(binary_score='yes')

### Answer Grader

In [115]:
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )


preamble = """You are a grader assessing whether an answer addresses / resolves a question \n
Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""


structured_llm_grader_answer = llm.with_structured_output(GradeAnswer)

prompt = ChatPromptTemplate.from_messages(
    [
    ("system",preamble),
    ("user","Question:\n\n {question} \n\nLLM Generation:\n{generation}")
 
    ]
)
answer_grader_rag = prompt|structured_llm_grader_answer

answer_grader_rag.invoke({'question':question,"generation":generation})



GradeAnswer(binary_score='yes')

### Web Search Tool


In [105]:
web_search_tool = TavilySearchResults()

  web_search_tool = TavilySearchResults()


### Building Graph

In [None]:
class GraphState(TypedDict):

    question:str
    generation:str
    documents:List[str]

#----------------------------

def retrieve(state):
    print("---RETRIEVE---")

    question = state['question']
    documents = retriever.invoke({'question':question})
    return {'documents':documents,'question':question}



def llm_fallback(state):
    print("---LLM Fallback---")

    generation = rag_fall_back.invoke({"question": question})
    return {"question": question, "generation": generation}


def generate(state):
    print("---GENERATE---")

    question = state['question']
    documents = state['documents']
    if not isinstance(documents, list):
        documents = [documents]
    generation = rag_generation.invoke({"documents": documents, "question": question})
    return {"documents": documents, "question": question, "generation":generation}


def grade_documents(state):
    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")

    question = state['question']
    documents = state['documents']

    filtered_docs = []
    for d in documents:
        score = retrieval_grader_chain.invoke({"document": docs, 
                                               "question": question}).binary_score
        if score=='yes':
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue
    return {"documents": filtered_docs, "question": question}


def web_search(state):
    print("---WEB SEARCH---")

    question=state['question']
    docs = web_search_tool.invoke({"quesry":question})
    web_search = "\n".join(d['content'] for d in docs)
    web_result = Document(page_content=web_search)
    return{"documents":web_result,"question":question}









In [None]:
docs = web_search_tool.invoke(dcontent{"query":question})

In [119]:
docs

[{'title': 'What Is AI Agent Memory? Types, Tradeoffs and Implementation',
  'url': 'https://www.techtarget.com/searchenterpriseai/tip/What-is-AI-agent-memory-Types-tradeoffs-and-implementation',
  'content': '## Types of AI agent memory\n\nAI agents can use two broad types of memory: short-term memory and long-term memory. Each type of memory can be designed in different common variations.\n\n### Short-term memory [...] Memory type: STM vs. LTM. STM is used to maintain context and continuity during a single session or task. LTM retains information across sessions and tasks, so an AI agent can learn, adapt and glean insights. LTM types include episodic, semantic and procedural, each providing unique benefits to an AI agent. STM uses relatively little memory capacity and can offer fast performance, while LTM can use extensive memory capacity, which introduces latency and inefficiency to the storage and [...] LTM is often implemented with well-established technologies, like databases and