In [11]:
LANGCHAIN_TRACING_V2="false"
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
LANGCHAIN_API_KEY="api_key"
LANGCHAIN_PROJECT="Zero2LLMOps"

In [12]:
import os

os.environ['LANGCHAIN_TRACING_V2'] = LANGCHAIN_TRACING_V2
os.environ['LANGCHAIN_ENDPOINT'] = LANGCHAIN_ENDPOINT
os.environ['LANGCHAIN_API_KEY'] = LANGCHAIN_API_KEY
os.environ['LANGCHAIN_PROJECT'] = LANGCHAIN_PROJECT

In [13]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()

True

In [14]:
model = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    model="gpt-4o-mini",
    temperature=0.7
)

In [15]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader(
    file_path="./transformersPaper.pdf"
)

docs = loader.load()

In [16]:
from langchain_text_splitters import (
    RecursiveCharacterTextSplitter
)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(docs)

In [17]:
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings

# embeddings = OllamaEmbeddings(
#     model="nomic-embed-text:latest"
# )

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

from langchain_community.vectorstores import FAISS

vector_store = FAISS.from_documents(
    documents=splits,
    embedding=embeddings
)

In [18]:
retriver = vector_store.as_retriever()

In [19]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

from langchain_ollama import ChatOllama

llm = model

# llm = ChatOpenAI(
#     model="llama3.1:latest",
#     temperature=0
# )

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain =(
    RunnableParallel({"context": retriver | format_docs, "question": RunnablePassthrough()})
    | prompt
    | llm
    | StrOutputParser()
)

In [20]:
rag_chain.invoke("What is the Author of Transformer paper ??")

'The authors of the Transformer paper are Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Lukasz Kaiser, and Aidan N. Gomez, among others. Their work proposed a novel model architecture relying entirely on attention mechanisms. The paper is titled "Attention is All You Need," published in 2017.'

## **Decomposition**

In [21]:
from langchain.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [22]:
from langchain_core.output_parsers import StrOutputParser

# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run
question = "What is the Author of Transformer paper ??"
questions = generate_queries_decomposition.invoke({"question":question})


In [23]:
questions

['1. Who are the authors of the original Transformer paper titled "Attention is All You Need"?',
 '2. What contributions did each author make to the Transformer paper?',
 '3. When was the Transformer paper published and in which conference or journal?']

In [24]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)


In [25]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

q_a_pairs = ""
for q in questions:
    
    rag_chain = (
    {"context": itemgetter("question") | retriver, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | llm
    | StrOutputParser())

    answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair


In [26]:
answer

'The Transformer paper titled "Attention is All You Need" was published in June 2017 at the Neural Information Processing Systems (NeurIPS) conference.'

In [27]:
q_a_pair

'Question: 3. When was the Transformer paper published and in which conference or journal?\nAnswer: The Transformer paper titled "Attention is All You Need" was published in June 2017 at the Neural Information Processing Systems (NeurIPS) conference.'

In [29]:
print(q_a_pairs)


---
Question: 1. Who are the authors of the original Transformer paper titled "Attention is All You Need"?
Answer: The authors of the original Transformer paper titled "Attention is All You Need" are:

1. Ashish Vaswani
2. Noam Shazeer
3. Niki Parmar
4. Jakob Uszkoreit
5. Llion Jones
6. Aidan N. Gomez
7. Łukasz Kaiser
8. Illia Polosukhin
---
Question: 2. What contributions did each author make to the Transformer paper?
Answer: The contributions of each author to the Transformer paper titled "Attention is All You Need" are as follows:

1. **Ashish Vaswani**: Designed and implemented the first Transformer models and was involved in nearly every aspect of the work.

2. **Noam Shazeer**: Proposed scaled dot-product attention, multi-head attention, and the parameter-free position representation, contributing to nearly every detail of the project.

3. **Niki Parmar**: Designed, implemented, tuned, and evaluated numerous model variants in the original codebase and tensor2tensor.

4. **Jakob 

## **RAG Fusion**

## RRF Explanation

#### Question Rankings

- **Question A**: 
  1. Doc1
  2. Doc4
  3. Doc3
  4. Doc2

- **Question B**: 
  1. Doc3
  2. Doc1
  3. Doc2
  4. Doc4

- **Question C**: 
  1. Doc4
  2. Doc3
  3. Doc1
  4. Doc2

### Rank Positions

- **Doc1**:
  - Question A rank: 1
  - Question B rank: 2
  - Question C rank: 3

- **Doc2**:
  - Question A rank: 4
  - Question B rank: 3
  - Question C rank: 4

- **Doc3**:
  - Question A rank: 3
  - Question B rank: 1
  - Question C rank: 2

- **Doc4**:
  - Question A rank: 2
  - Question B rank: 4
  - Question C rank: 1

### Reciprocal Rank Fusion Calculation

Using `k = 60`:

#### Doc1
- Reciprocal Rank (Question A): `1 / (60 + 1) = 1 / 61`
- Reciprocal Rank (Question B): `1 / (60 + 2) = 1 / 62`
- Reciprocal Rank (Question C): `1 / (60 + 3) = 1 / 63`
- **RRF(Doc1)**: `1 / 61 + 1 / 62 + 1 / 63 ≈ 0.0487`

#### Doc2
- Reciprocal Rank (Question A): `1 / (60 + 4) = 1 / 64`
- Reciprocal Rank (Question B): `1 / (60 + 3) = 1 / 63`
- Reciprocal Rank (Question C): `1 / (60 + 4) = 1 / 64`
- **RRF(Doc2)**: `1 / 64 + 1 / 63 + 1 / 64 ≈ 0.0469`

#### Doc3
- Reciprocal Rank (Question A): `1 / (60 + 3) = 1 / 63`
- Reciprocal Rank (Question B): `1 / (60 + 1) = 1 / 61`
- Reciprocal Rank (Question C): `1 / (60 + 2) = 1 / 62`
- **RRF(Doc3)**: `1 / 63 + 1 / 61 + 1 / 62 ≈ 0.0487`

#### Doc4
- Reciprocal Rank (Question A): `1 / (60 + 2) = 1 / 62`
- Reciprocal Rank (Question B): `1 / (60 + 4) = 1 / 64`
- Reciprocal Rank (Question C): `1 / (60 + 1) = 1 / 61`
- **RRF(Doc4)**: `1 / 62 + 1 / 64 + 1 / 61 ≈ 0.0484`

### Final Ranking

Based on the RRF scores:

1. **Doc1**: `≈ 0.0487`
2. **Doc3**: `≈ 0.0487`
3. **Doc4**: `≈ 0.0484`
4. **Doc2**: `≈ 0.0469`

In [None]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [None]:
from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [None]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriver.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

In [None]:
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})
