[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/Satyajeet-code/Generative-AI/blob/main/RAGSession42AI/RAGFusion.ipynb)

In [65]:
!pip -q install langchain faiss-cpu langchain-groq  langchain-community chromadb pypdf

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [66]:
from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.vectorstores.chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
import requests
import re
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceBgeEmbeddings

In [67]:
groq=""

In [68]:

loader = PyPDFLoader(r"/content/Applying generative AI with retrieval augmented generation to summarize and extract key clinical infor.pdf")
docs = loader.load_and_split()

In [69]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 100
)

In [70]:

texts = splitter.split_documents(docs)

In [71]:
model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

embedding_function = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs,

)

In [72]:
db = Chroma.from_documents(texts,
                       embedding_function,
                       persist_directory="./chroma_db")

In [81]:
query = "Explain the methodology"

db.similarity_search(query)

[Document(metadata={'page': 1, 'source': '/content/Applying generative AI with retrieval augmented generation to summarize and extract key clinical infor.pdf'}, page_content='This technique combines elements of both retrieval and generation \nmethods [38]. In this approach, the model uses a retrieval system to \nretrieve relevant information from a dataset or knowledge base and then \ngenerates responses or content based on that retrieved information [39]. \nRetrieval systems are efficient in finding relevant information from a \nlarge dataset, while generation models excel at creating coherent and \ncontextually appropriate responses. By incorporating retrieval, the'),
 Document(metadata={'source': 'text_file.txt'}, page_content='^ Nanda, Neel; Chan, Lawrence; Lieberum, Tom; Smith, Jess; Steinhardt, Jacob (2023-01-01). "Progress measures for grokking via mechanistic interpretability". arXiv:2301.05217 [cs.LG].\n^ a b c d e Mitchell, Melanie; Krakauer, David C. (28 March 2023). "The de

In [82]:
retriever = db.as_retriever()

llm =ChatGroq(groq_api_key=groq, model_name="Llama3-8b-8192")

In [83]:
prompt = ChatPromptTemplate.from_template("""
    Answer the following question based only on the provided context.
    <context>
    {context}
    </context>
    <Question>
    {input}
    </Question>""")

document_chain = create_stuff_documents_chain(llm, prompt)

retrieval_chain = create_retrieval_chain(retriever, document_chain)

response = retrieval_chain.invoke({"input": query})

output = response["answer"]
output = output.replace("*", "\n")



In [84]:
output

'According to the provided context, the methodology involves a combination of retrieval and generation methods. Here\'s a step-by-step explanation:\n\n1. The model uses a retrieval system to retrieve relevant information from a dataset or knowledge base.\n2. The retrieved documents are sent to the generative model along with specific prompt instructions.\n3. The generative model produces a concise and informative summary based on the retrieved documents and the prompt instructions.\n\nThis methodology is referred to as "retrieval-augmented generation", where a document retriever is used to retrieve relevant documents for a given query, and then an LLM (Large Language Model) generates an output based on both the query and the context included from the retrieved documents.'

In [130]:
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts import ChatMessagePromptTemplate, PromptTemplate
prompt = ChatPromptTemplate(input_variables=['query'],
                            messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[],template='You are a helpful assistant that generates multiple search queries based on a single input query.')),
                            HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], template='Generate multiple search queries related to: {question} \n OUTPUT (2 queries):'))])


generate_queries = (
    prompt | llm | StrOutputParser() | (lambda x: x.split("\n"))
)


In [131]:
from langchain.load import dumps, loads


def reciprocal_rank_fusion(results: list[list], k=1):
    fused_scores = {}
    for docs in results:
        # Assumes the docs are returned in sorted order of relevance
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

In [132]:
ragfusion_chain = generate_queries | retriever.map() | reciprocal_rank_fusion

In [133]:
query

'Explain the methodology'

In [134]:
from langchain.schema.runnable import RunnablePassthrough
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

full_rag_fusion_chain = (
    {
        "context": ragfusion_chain,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm

)

In [135]:
full_rag_fusion_chain.invoke({"question": query})

AIMessage(content='The methodology used in this research involves the application of generative AI with retrieval-augmented generation to summarize and extract key clinical information. The approach combines elements of both retrieval and generation methods, where a retrieval system is used to retrieve relevant information from a dataset or knowledge base and then generates responses or content based on that retrieved information.\n\nThe methodology involves the following steps:\n\n1. Retrieval: The retrieval system uses a search algorithm to retrieve related documents from a dataset or knowledge base.\n2. Generation: The generated model uses the retrieved documents to generate a summary or response.\n3. Post-processing: The generated summary or response is then post-processed to ensure its coherence and relevance to the query.\n\nThe methodology also involves the use of a parameter for the search operation (k) which was set at 20, which is the limitation of the available GPU memory fo

In [136]:
full_rag_fusion_chain.invoke({"question": query}).content

'The methodology described in the provided context is the use of Retrieval-Augmented Generation (RAG) for summarization and extraction of key clinical information. This approach combines the strengths of both retrieval and generation methods.\n\nThe methodology involves the following steps:\n\n1. Retrieval: The model uses a retrieval system to retrieve relevant information from a dataset or knowledge base. This is done by encoding the query and the documents into vectors, and then finding the documents with vectors most similar to the vector of the query.\n2. Generation: The retrieved documents are sent to a generative model along with specific prompt instructions to produce a concise and informative summary.\n3. Processing: The model breaks the nursing notes and structured data into manageable chunks of a fixed size (600 characters) to facilitate subsequent processing and analysis.\n\nThe authors also mention the use of a specific model, Llama 2, which is the leading open-source model