In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain import hub
from langchain.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain import hub
import textwrap
from langchain_google_genai import ChatGoogleGenerativeAI
import os
import google.generativeai as genai
from langchain_core.output_parsers import StrOutputParser
from langchain_community.llms import Ollama
from langchain.prompts import ChatPromptTemplate
from IPython.display import display
from langchain.load import dumps, loads
from IPython.display import Markdown
import glob
from FlagEmbedding import FlagReranker


  from .autonotebook import tqdm as notebook_tqdm


# Document loader and splitter

In [None]:
pdf_files = glob.glob("src/*.pdf")
pages = []

for pdf_file in pdf_files:
    pages.extend(PyPDFLoader(pdf_file).load_and_split())

for page in pages:
    page.page_content = page.page_content.replace("Imprimerie Officielle de la République Tunisienne", "")
    page.page_content = page.page_content.replace("/tatweel", "")

doc_chunks = []
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=850,
    separators=["\nArticle", "\n\n", "\n", ".", "!", "?", ",", " ", ""],
    chunk_overlap=100,
)
chunks = text_splitter.split_documents(pages)
len(chunks)

# embedder = HuggingFaceEmbeddings(
#     model_name = "BAAI/bge-m3"
# )


# vectordb = Chroma.from_documents(
#     documents=chunks,
#     embedding=embedder,
#     persist_directory=persist_directory
# ) 
# vectordb.persist()

# OR

# index_name = "pfa"
# os.environ["PINECONE_API_KEY"] = "9721efa4-ff98-4bc5-9b28-93919d1657a5"
#docsearch = PineconeVectorStore.from_documents(chunks, embedder, index_name=index_name)
#docsearch = PineconeVectorStore(index_name=index_name,embedding=embedder)


# Retrieval

In [4]:
embedder = HuggingFaceEmbeddings(
    model_name = "BAAI/bge-m3"
)
persist_directory = 'db/'
vectordb = Chroma(persist_directory=persist_directory,embedding_function=embedder)
retriever = vectordb.as_retriever(k=5)
print(vectordb._collection.count())

15880


<h3>Multi-Query</h3>

In [11]:
template = """Tu es un assistant juridique. 
Votre mission consiste à générer cinq versions différentes de la question initiale de l'utilisateur,
afin de récupérer des documents pertinents dans une base de données vectorielle. 
En proposant plusieurs points de vue sur la question de l'utilisateur, votre objectif
est de l'aider à surmonter certaines limitations de la recherche de similarité basée sur la distance. 
Fournissez ces questions alternatives séparées par des sauts de ligne. Question originale: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

# model = Ollama(model="llama3")
model = Ollama(model="mistral")

generate_queries = (
    prompt_perspectives 
    | model 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

def get_unique_union(documents):
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

question = "Qui peut etre president de la Tunisie?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

11

<h3>RAG-fusion</h3>

In [13]:
template = """Tu es un assistant juridique. 
Votre mission consiste à générer cinq versions différentes de la question initiale de l'utilisateur,
afin de récupérer des documents pertinents dans une base de données vectorielle. 
En proposant plusieurs points de vue sur la question de l'utilisateur, votre objectif
est de l'aider à surmonter certaines limitations de la recherche de similarité basée sur la distance. 
Fournissez ces questions alternatives séparées par des sauts de ligne. Question originale: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

model = Ollama(model="mistral")

generate_queries = (
    prompt_perspectives 
    | model
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

question = "Qui peut etre president de la Tunisie?"
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

14

<h3>Query-Decomposition</h3>

In [None]:
template = """Tu es un assistant utile qui génère plusieurs sous-questions à partir d'une question initiale.
L'objectif est de décomposer la question initiale en un ensemble de sous-problèmes ou de sous-questions pouvant
être répondus de manière isolée.
Génère des requêtes de recherche multiples liées à : {question}
Sortie (3 requêtes) :"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

model = Ollama(model="mistral")

generate_queries = (
    prompt_perspectives 
    | model
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)


14

# Reranking (using bge reranker)

# Reranking (using bge reranker)

In [None]:
reranker = FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation

# You can map the scores into 0-1 by set "normalize=True", which will apply sigmoid function to the score
scores = reranker.compute_score([[query,context.page_content] for context in unique_docs ], normalize=True)
print(scores) 
#rank the context.page_content by the scores
reranked_found_docs=sorted(zip(scores,unique_docs),reverse=True)
#remove the scores and keep only the context.page_content
sorted_found_docs=[doc[1] for doc in reranked_found_docs] 


# Prompt Engineering and generation

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

prompt = hub.pull("mehdixlabetix/rag-law-fr")


final_prompt = prompt.invoke({"context":format_docs(sorted_found_docs), "question":query})

print(final_prompt)

#### mistral LLM

#### list of gemini versions (optional)

In [None]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

#### Gemini (stream)

In [None]:
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

model = genai.GenerativeModel('gemini-1.5-pro-latest')

# response = model.generate_content(final_prompt.messages[0].content)

response = model.generate_content(final_prompt.messages[0].content,stream=True)
for chunk in response:
  print(chunk.text)

# Query safety evaluation

In [None]:
response._result.candidates[0].safety_ratings