In [13]:
import os
from dotenv import load_dotenv
import fitz
from langchain.document_loaders import PyMuPDFLoader
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings

In [11]:
load_dotenv()
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")

In [None]:
## 1. Load the PDF document
def load_pdf(path):
    loader = PyMuPDFLoader(path)
    return loader.load()

pdf_path = "attention-is-all-you-need.pdf"
documents = load_pdf(pdf_path)

In [6]:
## 2. Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

In [16]:
## 3. Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

In [19]:
## 4. Store in Chroma DB
vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./pdf_rag_data")
vectorstore.persist()

  vectorstore.persist()


In [24]:
## 5. Load Groq LLM
llm = ChatGroq(
    model = "gemma2-9b-it",
    temperature=0.3,
    max_tokens=512,
)

In [25]:
## 6. Create the RAG QA chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True
)

In [30]:
## 7. Ask Some questions
questions = [
    "What is the paper about?",
    "How Many layers are there in the Transformer model?",
    "What is the role of attention in the Transformer model?",
    "What is the number is Encodes and Decodes in the Transformer model?"
    "What is the main contribution of the paper?",
    "What are the key components of the Transformer architecture?",
    
]

In [32]:
for q in questions:
    result = rag_chain({"query": q})
    print(f"\nQ: {q}")
    print(f"A: {result['result']}")
    print("---------------------------------------------------------------------")


Q: What is the paper about?
A: This document appears to be an excerpt from a research paper discussing variations of the Transformer architecture. 

The paper focuses on how different architectural choices impact the performance of a machine translation model (specifically English to German).  It presents a table comparing various Transformer models with different parameters and their performance on a benchmark dataset. 


Let me know if you have any other questions about this excerpt. 


---------------------------------------------------------------------

Q: How Many layers are there in the Transformer model?
A: The Transformer model has 6 layers in both the encoder and decoder. 



---------------------------------------------------------------------

Q: What is the role of attention in the Transformer model?
A: The provided text explains that the Transformer model uses attention to process information. 

It states that varying the number of attention heads and the attention key a