### Import Libraries

In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

from langchain_groq import ChatGroq

### LLMs

In [2]:
from dotenv import load_dotenv
import os

load_dotenv() #Loading all api keys

True

In [3]:
groq_api_key = os.getenv("GROQ_API_KEY")

llm = ChatGroq(model = 'llama3-8b-8192', temperature=0)


### Indexing

In [4]:
## Pdf reader
loader=PyPDFLoader('attention.pdf')
docs=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
doc_splits=text_splitter.split_documents(docs)

# Add to vectorDB
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=huggingface_embeddings,
)

retriever = vectorstore.as_retriever()


### RAG Fusion

In [7]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)


question = "what is the use of Layer Normalization?"
generate_queries.invoke({"question":question})

['Here are five alternative versions of the user question to retrieve relevant documents from a vector database:',
 '',
 'What are the benefits of using Layer Normalization in deep learning models?',
 '',
 'How does Layer Normalization contribute to the overall performance of neural networks?',
 '',
 'What is the purpose of normalizing activations at each layer in a neural network, and how does Layer Normalization achieve this?',
 '',
 'Can you provide examples of scenarios where Layer Normalization is particularly useful or necessary in deep learning applications?',
 '',
 'What are the key differences between Layer Normalization and other normalization techniques, such as Batch Normalization or Instance Normalization?']

### Reciprocal Rank Fusion and Retrieval Chain

In [9]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results



retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

13

### Generation

In [10]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'According to the provided context, Layer Normalization is used to normalize the outputs of each sub-layer in the model, as well as the embedding layers. This is mentioned in the section "3.1.2. Layer Normalization" of the document.'