# Mécanismes de RAG avancés

In [3]:
from models import Models

# Initialize the models
models = Models()
embeddings = models.embeddings_ollama
llm = models.model_ollama

In [4]:
from langchain.vectorstores import FAISS

# Initialize the vector store
new_vector_store = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True,
)

In [5]:
# Define the chat prompt
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant. Answer the question based only the data provided."),
        ("human", "Use the user question {input} to answer the question. Use only the {context} to answer the question.")
    ]
)

Premier test sans mécanisme avancé

In [18]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

# Define the retrieval chain
retriever = new_vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 2},
)
combine_docs_chain = create_stuff_documents_chain(
    llm, prompt
)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [22]:
retrieval_chain.invoke({"input": "What's a good movie about an epic viking?"})

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


{'input': "What's a good movie about an epic viking?",
 'context': [Document(id='56a385d5-d727-446a-a102-18d4cffa6c92', metadata={'source': 'https://www.imdb.com/title/tt1494639'}, page_content="Title: Beowulf\nTags: psychedelic, avant garde, magical realism\nPlot: Set in 507, Beowulf is a legendary Geatish warrior who travels to Denmark with his band of soldiers, including his best friend, Wiglaf. They meet King Hrothgar, who needs a hero to slay Grendel, a hideously malformed troll-like creature with appalling strength and cunning. Grendel attacks Heorot, Hrothgar's mead hall, whenever the Danes held a celebration, and Hrothgar has been forced to close the hall. Upon arriving, Beowulf becomes attracted to Hrothgar's wife, Queen Wealtheow, who reciprocates his interest."),
  Document(id='be508309-deb9-4d04-92cb-f029d2770cc2', metadata={'source': 'https://www.imdb.com/title/tt0064000'}, page_content="Title: Alfred the Great\nTags: action\nPlot: When the Vikings invade England, Alfred (

# 1. Reranking

documentation [FlashRank reranker](https://python.langchain.com/docs/integrations/retrievers/flashrank-reranker/) sur le site de LangChain

The Contextual Compression Retriever passes queries to the base retriever, takes the initial documents and passes them through the Document Compressor. The Document Compressor takes a list of documents and shortens it by reducing the contents of documents or dropping documents altogether.

In [6]:
%pip install -qU  flashrank

Note: you may need to restart the kernel to use updated packages.


In [7]:
# Reranking with Flashrank

from langchain.retrievers.document_compressors import FlashrankRerank
from langchain.retrievers import ContextualCompressionRetriever

# Compressor for reranking
compressor = FlashrankRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)
chain_reranking = create_retrieval_chain(compression_retriever, combine_docs_chain)

In [7]:
chain_reranking.invoke({"input": "What's a good movie about an epic viking?"})

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


{'input': "What's a good movie about an epic viking?",
 'context': [Document(metadata={'id': 4, 'relevance_score': 0.9985577, 'source': 'https://www.imdb.com/title/tt0371552'}, page_content='Plot: As the story begins, the Vikings are depressed because in raid after raid they find the locations devoid of people. Cryptograf, an advisor to Viking chief Timandahaf, says that the enemies are always gone because, "Fear gives them wings". Timandahaf excitedly concludes that they must find a "Champion of Fear", who can teach them to be great cowards so they can fly, making them invincible. The chief says he\'ll give anything to whoever can bring him this Champion of Fear. Cryptograf hatches a secret plan to seize power and tells the chief they\'ll find the Champion of Fear in Gaul.During a typical day in the Gaulish village, chief Vitalstatistix announces the imminent arrival of his nephew Justforkix, who is to be trained to be a man. When Justforkix arrives from Parisium, he doesn\'t look lik

# 2. Hybrid Search

Hybrid Search avec ensemble Retrieval, issue de [ce blog](https://medium.com/@nadikapoudel16/advanced-rag-implementation-using-hybrid-search-reranking-with-zephyr-alpha-llm-4340b55fef22)

[Documentation BM25](https://python.langchain.com/docs/integrations/retrievers/bm25/) sur langchain

In [1]:
from data import load_movie_data, prepare_movie_data, split_movie_data
df = load_movie_data()
movies = prepare_movie_data(df)
text_splits = split_movie_data(movies)

Path to dataset files: C:\Users\CYTech Student\.cache\kagglehub\datasets\cryptexcode\mpst-movie-plot-synopses-with-tags\versions\1
14828 movies loaded
2966 documents loaded
21211 splits created


In [4]:
%pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2
Note: you may need to restart the kernel to use updated packages.


In [15]:
import nltk
nltk.download("punkt_tab")

[nltk_data] Downloading package punkt_tab to C:\Users\CYTech
[nltk_data]     Student\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


True

In [19]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from nltk.tokenize import word_tokenize

# Define the BM25 retriever
keyword_retriever = BM25Retriever.from_documents(
    text_splits,
    k=2,
    preprocess_func=word_tokenize,
)

# Combine the BM25 and base retrievers
ensemble_retriever = EnsembleRetriever(
    retrievers=[retriever, keyword_retriever],
    weights=[0.5, 0.5]
)
chain_hybrid = create_retrieval_chain(ensemble_retriever, combine_docs_chain)

# Combine the BM25 and Flashrank retrievers
ensemble_retriever_rerank = EnsembleRetriever(
    retrievers=[compression_retriever, keyword_retriever],
    weights=[0.5, 0.5]
)
chain_hybrid_reranking = create_retrieval_chain(ensemble_retriever_rerank, combine_docs_chain)

In [20]:
chain_hybrid.invoke({"input": "What's a good movie about an epic viking?"})

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


{'input': "What's a good movie about an epic viking?",
 'context': [Document(id='56a385d5-d727-446a-a102-18d4cffa6c92', metadata={'source': 'https://www.imdb.com/title/tt1494639'}, page_content="Title: Beowulf\nTags: psychedelic, avant garde, magical realism\nPlot: Set in 507, Beowulf is a legendary Geatish warrior who travels to Denmark with his band of soldiers, including his best friend, Wiglaf. They meet King Hrothgar, who needs a hero to slay Grendel, a hideously malformed troll-like creature with appalling strength and cunning. Grendel attacks Heorot, Hrothgar's mead hall, whenever the Danes held a celebration, and Hrothgar has been forced to close the hall. Upon arriving, Beowulf becomes attracted to Hrothgar's wife, Queen Wealtheow, who reciprocates his interest."),
  Document(metadata={'source': 'https://www.imdb.com/title/tt0065513'}, page_content='a junction alongside another lorry transporting another hapless man-in-a-box our guy can barely keep a grip as we finally have ou

In [21]:
chain_hybrid_reranking.invoke({"input": "What's a good movie about an epic viking?"})

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


{'input': "What's a good movie about an epic viking?",
 'context': [Document(metadata={'id': 4, 'relevance_score': 0.9985577, 'source': 'https://www.imdb.com/title/tt0371552'}, page_content='Plot: As the story begins, the Vikings are depressed because in raid after raid they find the locations devoid of people. Cryptograf, an advisor to Viking chief Timandahaf, says that the enemies are always gone because, "Fear gives them wings". Timandahaf excitedly concludes that they must find a "Champion of Fear", who can teach them to be great cowards so they can fly, making them invincible. The chief says he\'ll give anything to whoever can bring him this Champion of Fear. Cryptograf hatches a secret plan to seize power and tells the chief they\'ll find the Champion of Fear in Gaul.During a typical day in the Gaulish village, chief Vitalstatistix announces the imminent arrival of his nephew Justforkix, who is to be trained to be a man. When Justforkix arrives from Parisium, he doesn\'t look lik