# The Apprentice's Quest - The Personal Document Oracle

In [None]:
# Cell 2: Import libraries and set up API Keys
import os
import getpass

os.environ["GROQ_API_KEY"] = getpass.getpass("Enter Your Groq API Key:")


Enter Your Groq API Key:··········


In [None]:
# Cell 3: Download the sample document (if needed)
!wget -O attention_is_all_you_need.pdf https://arxiv.org/pdf/1706.03762.pdf

--2025-06-07 17:15:05--  https://arxiv.org/pdf/1706.03762.pdf
Resolving arxiv.org (arxiv.org)... 151.101.195.42, 151.101.3.42, 151.101.131.42, ...
Connecting to arxiv.org (arxiv.org)|151.101.195.42|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://arxiv.org/pdf/1706.03762 [following]
--2025-06-07 17:15:05--  http://arxiv.org/pdf/1706.03762
Connecting to arxiv.org (arxiv.org)|151.101.195.42|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2215244 (2.1M) [application/pdf]
Saving to: ‘attention_is_all_you_need.pdf’


2025-06-07 17:15:05 (40.8 MB/s) - ‘attention_is_all_you_need.pdf’ saved [2215244/2215244]



In [21]:
# Cell 4: The Core RAG Logic (Hugging Face Edition)
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings  # <-- Import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

# 1. Load the document
print("Loading the document...")
loader = PyPDFLoader("attention_is_all_you_need.pdf")
docs = loader.load()

# 2. Split the document into chunks
print("Splitting the document into chunks...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)

# 3. Create embeddings using Hugging Face and store in FAISS
print("Downloading and creating Hugging Face embeddings...")
# Use a specific, well-performing model
model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name) # <-- Use HuggingFaceEmbeddings
print("Embeddings model loaded.")

db = FAISS.from_documents(texts, embeddings)
print("Vector store created.")

# 4. Create the retriever
retriever = db.as_retriever(search_kwargs={"k": 3})

# 5. Create the QA chain with Groq LLM
print("Creating the QA chain with Groq...")
llm = ChatGroq(
    model_name="llama3-8b-8192",
    temperature=0
)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)
print("Fully free RAG pipeline is ready!")

Loading the document...
Splitting the document into chunks...
Downloading and creating Hugging Face embeddings...
Embeddings model loaded.
Vector store created.
Creating the QA chain with Groq...
Fully free RAG pipeline is ready!


In [22]:
# Cell 5: Ask a question
question = "What is a transformer and what are its main components?"
result = qa_chain({"query": question})

print("\n--- Answer ---")
print(result["result"])

print("\n--- Sources ---")
for source in result["source_documents"]:
    print(f"- Page {source.metadata['page']}: {source.page_content[:250]}...")


--- Answer ---
A Transformer is a type of neural network architecture primarily used for natural language processing (NLP) tasks, such as machine translation, text summarization, and language modeling. The main components of a Transformer are:

1. Encoder: The encoder is responsible for processing the input sequence (e.g., a sentence or paragraph) and generating a continuous representation of the input.
2. Decoder: The decoder generates the output sequence (e.g., the translated sentence) based on the input sequence and the encoder's output.
3. Multi-Head Attention: This is a key component of the Transformer, allowing the model to attend to different parts of the input sequence simultaneously and weigh their importance.
4. Self-Attention: The Transformer uses self-attention mechanisms in both the encoder and decoder, allowing each position in the sequence to attend to all other positions.
5. Positional Encoding: The Transformer uses positional encoding to provide the model with informa