### Import Libraries

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

from langchain_groq import ChatGroq

### LLMs

In [None]:
from dotenv import load_dotenv
import os

load_dotenv() #Loading all api keys

In [4]:
groq_api_key = os.getenv("GROQ_API_KEY")

llm = ChatGroq(model = 'llama3-8b-8192', temperature=0)


### Indexing

In [5]:
## Pdf reader
loader=PyPDFLoader('attention.pdf')
docs=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
doc_splits=text_splitter.split_documents(docs)

# Add to vectorDB
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=huggingface_embeddings,
)

retriever = vectorstore.as_retriever()


### Multi Query: Different Perspectives

In [44]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [45]:
question = "what is the use of Layer Normalization?"
generate_queries.invoke({"question":question})

['Here are five alternative versions of the user question to retrieve relevant documents from a vector database:',
 '',
 'What are the benefits of using Layer Normalization in deep learning models?',
 '',
 'How does Layer Normalization contribute to the overall performance of neural networks?',
 '',
 'What is the purpose of normalizing activations at each layer in a neural network, and how does Layer Normalization achieve this?',
 '',
 'Can you provide examples of scenarios where Layer Normalization is particularly useful or necessary in deep learning applications?',
 '',
 'What are the key differences between Layer Normalization and other normalization techniques, such as Batch Normalization or Instance Normalization?']

### Retrieval Chain

In [12]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is the use of positional encoding layer?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

19

In [13]:
docs[:5]

[Document(page_content='[37] Vinyals & Kaiser, Koo, Petrov, Sutskever, and Hinton. Grammar as a foreign language. In\nAdvances in Neural Information Processing Systems , 2015.\n[38] Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V Le, Mohammad Norouzi, Wolfgang\nMacherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey, et al. Google’s neural machine\ntranslation system: Bridging the gap between human and machine translation. arXiv preprint\narXiv:1609.08144 , 2016.\n[39] Jie Zhou, Ying Cao, Xuguang Wang, Peng Li, and Wei Xu. Deep recurrent models with\nfast-forward connections for neural machine translation. CoRR , abs/1606.04199, 2016.\n[40] Muhua Zhu, Yue Zhang, Wenliang Chen, Min Zhang, and Jingbo Zhu. Fast and accurate\nshift-reduce constituent parsing. In Proceedings of the 51st Annual Meeting of the ACL (Volume\n1: Long Papers) , pages 434–443. ACL, August 2013.\n12', metadata={'page': 11, 'source': 'attention.pdf'}),
 Document(page_content='PEpos.\nWe also experimented with usin

### Generation

In [30]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)


final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'According to the provided context, the positional encoding layer is used to inject information about the relative or absolute position of the tokens in the sequence. This is necessary because the Transformer model contains no recurrence and no convolution, and therefore, it cannot make use of the order of the sequence by itself.\n\nThe positional encoding layer adds "positional encodings" to the input embeddings at the bottoms of the encoder and decoder stacks. These positional encodings have the same dimension as the embeddings, so that the two can be summed. The positional encodings are sine and cosine functions of different frequencies, which allows the model to easily learn to attend by relative positions.'