# Agentic RAG

In [1]:
import os
import pprint
from dotenv import load_dotenv
from typing import Annotated, Sequence, Literal
from typing_extensions import TypedDict
from pydantic import BaseModel, Field

load_dotenv()

from langchain_community.document_loaders import WebBaseLoader, csv_loader
from langchain_ollama import ChatOllama
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import Document

from langchain_core.rate_limiters import InMemoryRateLimiter
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain.tools.retriever import create_retriever_tool

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langgraph.prebuilt import tools_condition, ToolNode



USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
HF_TOKEN = os.getenv("HF_TOKEN")

## Téléchargement du CSV

Arguments ajoutées pour que les métadonnées prennent en compte la date

In [3]:
loader = csv_loader.CSVLoader(
    file_path="datas/short_APPL.csv",
    metadata_columns=["date"],
    encoding="utf-8",
    csv_args={
        "delimiter": ",",
        "quotechar": '"',
    })
docs = loader.load()

print(f"Loaded {len(docs)} documents from CSV file.")
print(f"First document metadata:\n{docs[0].metadata}")
print(f"First document content:\n{docs[0].page_content}")

Loaded 10 documents from CSV file.
First document metadata:
{'source': 'datas/short_APPL.csv', 'row': 0, 'date': '2023-12-16 22:00:00 UTC'}
First document content:
: 0
article: After an absolute disaster of a year in 2022, the stock market appears to have turned the corner. Each of the major market indexes has gained more than 20% from their respective trough. Perhaps more importantly, the S&P 500 and the Nasdaq Composite are within striking distance of new highs, which will check the final box marking the start of a new bull market.
Closing out the old and ringing in the new is a great time for examination, and one of the places I start is with my portfolio. A review of my top investments and how they came to be that way can offer valuable insight for the future.
Here's a look at my six largest holdings heading into 2024 (as of the market close on Dec. 15) and the incredibly valuable lesson I learned from each one.
Image source: Getty Images.
No. 6: Nvidia
Every investor has one -- th

## Encodage

split le document en chunks<br/>
et embedding avec un modèle HuggingFace

In [4]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=100,
    chunk_overlap=50,
)
docs_splits = text_splitter.split_documents(docs)
print(f"Total chunks: {len(docs_splits)}")
print(f"First chunk: {docs_splits[0].page_content}")

Total chunks: 173
First chunk: : 0
article: After an absolute disaster of a year in 2022, the stock market appears to have turned the corner. Each of the major market indexes has gained more than 20% from their respective trough. Perhaps more importantly, the S&P 500 and the Nasdaq Composite are within striking distance of new highs, which will check the final box marking the start of a new bull market.


In [5]:
for chunk in docs_splits[:3]:
    print("Texte :", chunk.page_content)
    print("Date :", chunk.metadata.get("date"))
    print("------")

Texte : : 0
article: After an absolute disaster of a year in 2022, the stock market appears to have turned the corner. Each of the major market indexes has gained more than 20% from their respective trough. Perhaps more importantly, the S&P 500 and the Nasdaq Composite are within striking distance of new highs, which will check the final box marking the start of a new bull market.
Date : 2023-12-16 22:00:00 UTC
------
Texte : Closing out the old and ringing in the new is a great time for examination, and one of the places I start is with my portfolio. A review of my top investments and how they came to be that way can offer valuable insight for the future.
Here's a look at my six largest holdings heading into 2024 (as of the market close on Dec. 15) and the incredibly valuable lesson I learned from each one.
Image source: Getty Images.
No. 6: Nvidia
Date : 2023-12-16 22:00:00 UTC
------
Texte : Every investor has one -- the "stock that got away." The one you meant to buy, only to find 

## Contextualiser les chunks

In [6]:
def buid_contextual_chunks(chunks, window=2):
    contextual_chunks=[]
    for i in range(len(chunks)):
        start = max(i - window,0)
        end = min(i + window + 1, len(chunks))
        combined_text = "\n".join([chunks[j].page_content for j in range(start, end)])

        metadata = chunks[i].metadata.copy()
        contextual_chunks.append(Document(page_content=combined_text, metadata=metadata))
    return contextual_chunks

contextual_docs = buid_contextual_chunks(docs_splits)

In [7]:
embedding_model = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2")

  embedding_model = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [8]:
persist_directory = "./chroma_db"
os.makedirs(persist_directory, exist_ok=True)

In [9]:
vectorestore = Chroma.from_documents(
    documents=contextual_docs,
    collection_name="rag-chroma",
    embedding=embedding_model,
    persist_directory=persist_directory
)
retriever = vectorestore.as_retriever()

In [10]:
retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_apple_news",
    "Search and return information from press articles about Apple Inc., including news related to its stock market activity, financial performance, and business developments.",
)
tools = [retriever_tool]

In [11]:
import chromadb
print(chromadb.__version__)

1.0.4


## Agent State

Création de la classe AgentState
création des différentes Edges

In [12]:
class AgentState(TypedDict):
   messages: Annotated[Sequence[BaseMessage], add_messages]

différentes fonctions

In [48]:
def grade_documents(state) -> Literal["generate", "rewrite"]:
    """
    Détermine si les documents récupérés sont pertinents par rapport à la question.
    
    Args :
        state (messages): L'état des messages du système.
        
    Returns :
        str: Une décision quant à savoir si les documents sont pertinents ou non
    """
    print("---CHECK RELEVANCE---")

    # Data model
    class grade(BaseModel):
        """Score binaire pour la vérification de la pertinence."""
        binary_score: str = Field(description="Score de pertinence 'yes' or 'no' " )

    # LLM
    model = ChatOllama(temperature=0, model="mistral", streaming=True)

    llm_with_tool = model.with_structured_output(grade)

    # Prompt
    prompt = PromptTemplate(
        template="""You are a grader assessing relevance of a retrieved document to a user question. \n 
        Here is the retrieved document: \n\n {context} \n\n
        Here is the user question: {question} \n
        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.""",
        input_variables=["context", "question"],
    )
      
    # Chain
    chain = prompt | llm_with_tool

    messages = state["messages"]
    last_message = messages[-1]

    question = messages[0].content
    docs = last_message.content

    scored_result = chain.invoke({"question": question, "context": docs})

    score = scored_result.binary_score

    if score == "yes":
        print("---DECISION: DOCS RELEVANT---")
        state["next"] = "hallucinations_grader"
    else:
        print("---DECISION: DOCS NOT RELEVANT---")
        print("score = ", score)
        state["next"] = "rewrite"

    return state

In [14]:
chat = ChatOllama(model="mistral")
response = chat.invoke([HumanMessage(content="Bonjour, es-tu prêt à m'aider avec mon RAG (Retrievel Augmented Generation) ?")])
print(response.content)

Bonjour ! Bien sûr que je peux t'aider avec ton RAG (Retrieval Augmented Generation). Qu'est ce qui te fait de la difficulté exactement ? Penses-tu avoir besoin d'aide pour créer un modèle de génération de texte augmenté de récupération de données, ou es-tu plutôt intéressé par l'utilisation d'un tel modèle ?


In [15]:
def agent(state):
    """
    Appelle le modèle d'agent pour générer une réponse basée sur l'état actuel. Compte tenu de la question, il décidera de la récupérer à l'aide de l'outil de récupération ou de la terminer.

    Args:
        state(messages): L'état des messages du système.
    Returns:
    dict: L'état mis à jour avec la réponse de l'agent ajoutée aux messages
    """
    print("---CALL AGENT---")
    messages = state["messages"]
    model = ChatOllama(model="mistral", temperature=0, streaming=True)
    model = model.bind_tools(tools)
    response = model.invoke(messages)

    return {"messages": [response]}

In [16]:
def rewrite(state):
    """
    Transforme  la requête pour produire une meilleure question.

    Args:
        state(messages): L'état des messages du système.

    Returns:
        dict: L'état mis à jour avec la question reformulée.
    """

    print("---TRANSFORM QUERY ---")
    messages = state["messages"]
    question = messages[0].content

    msg = [
        HumanMessage(
            content=f"""\n
    Examine l'entrée et essaie de raisonner sur l'intention sémantique/le sens sous-jacent. 
    Voici la question initiale ::
    \n ------- \n
    {question} 
    \n ------- \n
    Formulate an improved question: """,
        )
    ]

    model = ChatOllama(model="mistral", temperature=0, streaming=True)
    response = model.invoke(msg)
    return {"messages": [response]}

In [17]:
def generate(state):
    """
    Génère une réponse
   
    Args:
        state(messages): L'état des messages du système.
   
    Returns:
        dict: L'état mis à jour avec la réponse générée.
    """
    print("---GENERATE---")
    messages = state["messages"]  # Correction de l'orthographe de messages
    question = messages[0].content
    last_message = messages[-1]
    docs = last_message.content
    
    # Créer notre propre prompt au lieu d'utiliser hub.pull
    prompt = ChatPromptTemplate.from_messages([
        ("system", """Vous êtes un assistant IA utile. Utilisez le contexte suivant pour répondre à la question de l'utilisateur.
        Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas. N'essayez PAS d'inventer une réponse.
        Si la question n'est pas liée au contexte, expliquez poliment que vous répondez seulement aux questions en rapport avec le contexte fourni.
        
        Contexte: {context}"""),
        ("human", "{question}")
    ])
    
    # LLM
    llm = ChatOllama(model="mistral", temperature=0, streaming=True)
    
    # Créer et exécuter la chaîne
    rag_chain = prompt | llm | StrOutputParser()
    response = rag_chain.invoke({
        "context": docs,
        "question": question
    })
    
    return {"messages": [response]}

In [47]:
def hallucinations_grader(state) -> Literal["generate", "rewrite"]:
    """
    Vérifie si la réponse générée contient des hallucinations.
    
    Args:
        state (messages): L'état des messages du système.
        
    Returns:
        str: Une décision quant à savoir si la réponse contient des hallucinations ou non
    """
    print("---CHECK HALLUCINATIONS---")

    # Data model
    class grade(BaseModel):
        """Score binaire pour la vérification de la pertinence."""
        binary_score: str = Field(description="Score de pertinence 'yes' or 'no' " )

    # LLM
    model = ChatOllama(temperature=0, model="mistral", streaming=True)

    llm_with_tool = model.with_structured_output(grade)

    # Prompt
    prompt = PromptTemplate(
        template="""You are a grader assessing hallucinations in the generated response. \n 
        Here is the generated response: \n\n {context} \n\n
        Here is the user question: {question} \n
        If the response contains hallucinations, grade it as relevant. \n
        Give a binary score 'yes' or 'no' score to indicate whether the response contains hallucinations.""",
        input_variables=["context", "question"],
    )
      
    # Chain
    chain = prompt | llm_with_tool

    messages = state["messages"]
    last_message = messages[-1]

    question = messages[0].content
    docs = last_message.content

    scored_result = chain.invoke({"question": question, "context": docs})

    score = scored_result.binary_score

    if score == "yes":
        print("---DECISION: HALLUCINATIONS DETECTED---")
        state["next"] = "rewrite"
    else:
        print("---DECISION: NO HALLUCINATIONS---")
        state["next"] = "generate"
    
    return state

In [19]:

rag_prompt = ChatPromptTemplate.from_messages([
    ("system", """Vous êtes un assistant IA utile. Utilisez le contexte suivant pour répondre à la question de l'utilisateur.
    Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas. N'essayez PAS d'inventer une réponse.
    Si la question n'est pas liée au contexte, expliquez poliment que vous répondez seulement aux questions en rapport avec le contexte fourni.
    
    Context: {context}"""),
    ("human", "{question}")
])
print("*" * 20 + "Prompt[rlm/rag-prompt]" + "*" * 20)
print(rag_prompt.pretty_print())

********************Prompt[rlm/rag-prompt]********************

Vous êtes un assistant IA utile. Utilisez le contexte suivant pour répondre à la question de l'utilisateur.
    Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas. N'essayez PAS d'inventer une réponse.
    Si la question n'est pas liée au contexte, expliquez poliment que vous répondez seulement aux questions en rapport avec le contexte fourni.
    
    Context: [33;1m[1;3m{context}[0m


[33;1m[1;3m{question}[0m
None


## Graph

In [51]:
workflow = StateGraph(AgentState)

workflow.add_node("agent", agent)
retrieve = ToolNode([retriever_tool])
workflow.add_node("retrieve", retrieve)
workflow.add_node("grade_documents", grade_documents)
workflow.add_node("rewrite", rewrite)
workflow.add_node("generate", generate)
workflow.add_node("hallucinations_grader", hallucinations_grader)

workflow.add_edge(START, "agent")
workflow.add_conditional_edges(
    "agent", 
    tools_condition, 
    {
        "tools": "retrieve",
        END: END
    })
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    lambda state: state["next"],
    {
        "hallucinations_grader": "hallucinations_grader",  
        "rewrite": "rewrite"                  
    }
)

workflow.add_edge("generate", "hallucinations_grader")
workflow.add_conditional_edges(
    "hallucinations_grader",
    lambda state: state["next"],
    {
        "generate": END,                     
        "rewrite": "rewrite"                
    }
)

workflow.add_edge("rewrite", "agent")

graph = workflow.compile()

In [54]:
inputs = {
    "messages": [
        ("user", "Qui est Palantir ?")
    ]
}

for output in graph.stream(inputs):
    for key, value in output.items():
        pprint.pprint(f"Output from node '{key}':")
        pprint.pprint("---")
        pprint.pprint(value, indent=2, width=80, depth=None)
    pprint.pprint("\n---\n")

---CALL AGENT---
"Output from node 'agent':"
'---'
{ 'messages': [ AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'mistral', 'created_at': '2025-04-16T13:31:56.11865122Z', 'done': True, 'done_reason': 'stop', 'total_duration': 6765337450, 'load_duration': 9775749, 'prompt_eval_count': 100, 'prompt_eval_duration': 246000000, 'eval_count': 290, 'eval_duration': 6508000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None), 'model_name': 'mistral'}, id='run-485b9b3e-6bce-4e85-a625-40728f1c1b33-0', tool_calls=[{'name': 'retrieve_apple_news', 'args': {'query': 'Palantir'}, 'id': '4891cecc-cc44-4582-883c-62bc5cc028e0', 'type': 'tool_call'}], usage_metadata={'input_tokens': 100, 'output_tokens': 290, 'total_tokens': 390})]}
'\n---\n'
"Output from node 'retrieve':"
'---'
{ 'messages': [ ToolMessage(content='Still, for long-term, growth-oriented investors, Palantir is a name worth considering, given the soaring demand for its products and it