# Mécanismes de RAG avancés

In [1]:
from models import Models

# Initialize the models
models = Models()
embeddings = models.embeddings_ollama
llm = models.model_ollama

In [2]:
from langchain.vectorstores import FAISS

# Initialize the vector store
new_vector_store = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True,
)

In [3]:
# Define the chat prompt
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant. Answer the question based only the data provided."),
        ("human", "Use the user question {input} to answer the question. Use only the {context} to answer the question.")
    ]
)

Premier test sans mécanisme avancé

In [4]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

# Define the retrieval chain
retriever = new_vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 2},
)
combine_docs_chain = create_stuff_documents_chain(
    llm, prompt
)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [5]:
retrieval_chain.invoke({"input": "What's a good movie about an epic viking?"})

{'input': "What's a good movie about an epic viking?",
 'context': [Document(id='56a385d5-d727-446a-a102-18d4cffa6c92', metadata={'source': 'https://www.imdb.com/title/tt1494639'}, page_content="Title: Beowulf\nTags: psychedelic, avant garde, magical realism\nPlot: Set in 507, Beowulf is a legendary Geatish warrior who travels to Denmark with his band of soldiers, including his best friend, Wiglaf. They meet King Hrothgar, who needs a hero to slay Grendel, a hideously malformed troll-like creature with appalling strength and cunning. Grendel attacks Heorot, Hrothgar's mead hall, whenever the Danes held a celebration, and Hrothgar has been forced to close the hall. Upon arriving, Beowulf becomes attracted to Hrothgar's wife, Queen Wealtheow, who reciprocates his interest."),
  Document(id='be508309-deb9-4d04-92cb-f029d2770cc2', metadata={'source': 'https://www.imdb.com/title/tt0064000'}, page_content="Title: Alfred the Great\nTags: action\nPlot: When the Vikings invade England, Alfred (

# 1. Hybrid Search

Hybrid Search avec ensemble Retrieval, issue de [ce blog](https://medium.com/@nadikapoudel16/advanced-rag-implementation-using-hybrid-search-reranking-with-zephyr-alpha-llm-4340b55fef22)

[Documentation BM25](https://python.langchain.com/docs/integrations/retrievers/bm25/) sur langchain

In [6]:
from data import load_movie_data, prepare_movie_data, split_movie_data
df = load_movie_data()
movies = prepare_movie_data(df)
text_splits = split_movie_data(movies)

Path to dataset files: C:\Users\CYTech Student\.cache\kagglehub\datasets\cryptexcode\mpst-movie-plot-synopses-with-tags\versions\1
14828 movies loaded
2966 documents loaded
21211 splits created


In [7]:
%pip install rank_bm25

Note: you may need to restart the kernel to use updated packages.


In [8]:
import nltk
nltk.download("punkt_tab")

[nltk_data] Downloading package punkt_tab to C:\Users\CYTech
[nltk_data]     Student\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [9]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from nltk.tokenize import word_tokenize

# Define the BM25 retriever
keyword_retriever = BM25Retriever.from_documents(
    text_splits,
    k=2,
    preprocess_func=word_tokenize,
)

# Combine the BM25 and base retrievers
ensemble_retriever = EnsembleRetriever(
    retrievers=[retriever, keyword_retriever],
    weights=[0.5, 0.5]
)
chain_hybrid = create_retrieval_chain(ensemble_retriever, combine_docs_chain)

In [10]:
chain_hybrid.invoke({"input": "What's a good movie about an epic viking?"})

{'input': "What's a good movie about an epic viking?",
 'context': [Document(id='56a385d5-d727-446a-a102-18d4cffa6c92', metadata={'source': 'https://www.imdb.com/title/tt1494639'}, page_content="Title: Beowulf\nTags: psychedelic, avant garde, magical realism\nPlot: Set in 507, Beowulf is a legendary Geatish warrior who travels to Denmark with his band of soldiers, including his best friend, Wiglaf. They meet King Hrothgar, who needs a hero to slay Grendel, a hideously malformed troll-like creature with appalling strength and cunning. Grendel attacks Heorot, Hrothgar's mead hall, whenever the Danes held a celebration, and Hrothgar has been forced to close the hall. Upon arriving, Beowulf becomes attracted to Hrothgar's wife, Queen Wealtheow, who reciprocates his interest."),
  Document(metadata={'source': 'https://www.imdb.com/title/tt0065513'}, page_content='a junction alongside another lorry transporting another hapless man-in-a-box our guy can barely keep a grip as we finally have ou

# 2. Reranking

documentation [FlashRank reranker](https://python.langchain.com/docs/integrations/retrievers/flashrank-reranker/) sur le site de LangChain

The Contextual Compression Retriever passes queries to the base retriever, takes the initial documents and passes them through the Document Compressor. The Document Compressor takes a list of documents and shortens it by reducing the contents of documents or dropping documents altogether.

In [11]:
%pip install -qU  flashrank

Note: you may need to restart the kernel to use updated packages.


In [12]:
# Reranking with Flashrank

from langchain.retrievers.document_compressors import FlashrankRerank
from langchain.retrievers import ContextualCompressionRetriever

# Compressor for reranking
compressor = FlashrankRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=ensemble_retriever,
)
chain_reranking = create_retrieval_chain(compression_retriever, combine_docs_chain)

In [13]:
chain_reranking.invoke({"input": "What's a good movie about an epic viking?"})

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


{'input': "What's a good movie about an epic viking?",
 'context': [Document(metadata={'id': 1, 'relevance_score': 0.9993699, 'source': 'https://www.imdb.com/title/tt0065513'}, page_content='a junction alongside another lorry transporting another hapless man-in-a-box our guy can barely keep a grip as we finally have our suspicions\nconfirmed and realize that everything is wrong and something sinister is afoot...Who has kidnapped him? Where are they going? What terrible fate is about to befall him?The film is a triumph, telling a simple story with an excellent central performance from the lead that has to deliver a huge range of emotions\nthrough body language without turning the whole film into an embarrassing pantomime. The photography sets this apart from the usual TV\nmovie fare and the varied score, which ranges from playful for the comedic opening to apocalyptic during the twisted finale, all help to create a thrill\nride with a shocking ending that will stay with you for a good w