In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

GOOGLE_API_KEY=os.environ.get("GOOGLE_API_KEY")

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import init_chat_model
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from unstructured.partition.pdf import partition_pdf


In [61]:
def extract_elements(file_path):
    """Extract text, table, and image elements from PDF"""
    elements = partition_pdf(
        filename=file_path,
        strategy="hi_res",
        infer_table_structure=True,
        extract_image_block_types=["Image"],
        extract_image_block_to_payload=True
    )
    print(f"✅ Extracted {len(elements)} elements")
    return elements


In [62]:
def create_chunks(elements):
    """Combine text from all elements and split into manageable chunks"""
    all_text = "\n".join([str(e) for e in elements])
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=2000,
        chunk_overlap=300,
        separators=["\n\n", "\n", ".", "!", "?", ";"]
    )
    chunks = splitter.split_text(all_text)
    print(f"✅ Created {len(chunks)} text chunks")
    return chunks


In [74]:
def create_vectorstore(chunks):
    """Create embeddings using Gemini and store in Chroma"""
    
    #embeddings = embeddings  # or "models/gemini-embedding-001"
    embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
    vectorstore = Chroma.from_texts(chunks, embedding=embeddings, collection_name="pdf_chunks")
    print("✅ Vector store created with Chroma")
    return vectorstore


In [82]:
def create_qa_chain(vectorstore):
    """Build a Retrieval-QA chain using Gemini 2.5 Flash"""
    llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        verbose=True
    )
    return qa_chain


In [83]:
# Step 1: Extract PDF elements
elements = extract_elements("attention-is-all-you-need-Paper.pdf")


✅ Extracted 173 elements


In [88]:

# Step 2: Chunk text
chunks = create_chunks(elements)


✅ Created 18 text chunks


In [89]:

# Step 3: Create vector store
vectorstore = create_vectorstore(chunks)


✅ Vector store created with Chroma


In [None]:

# Step 4: Build QA chain
qa_chain = create_qa_chain(vectorstore)

In [None]:
# Step 6: Ask questions
query = "What is the main topic discussed in the document?"
response = qa_chain.run(query)
print("💬 Answer:", response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
💬 Answer: The provided document is a list of academic paper citations. Based on the titles and authors, the main topics discussed across these papers revolve around:

*   **Neural Networks and Architectures:** Especially Recurrent Neural Networks (RNNs) and Long Short-Term Memory (LSTM) networks.
*   **Attention Mechanisms:** Various approaches to attention, structured attention, and self-attention, often in the context of sequence-to-sequence models.
*   **Natural Language Processing (NLP) Tasks:** Such as Neural Machine Translation (NMT), language modeling, and abstractive summarization.
*   **Optimization and Training of Neural Networks:** Including methods like Adam and techniques to address issues like long-term dependencies and overfitting (e.g., Dropout).


In [None]:
query = "Define the enccode and decoder"
response = qa_chain.run(query)
print("💬 Answer:", response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
💬 Answer: The Transformer model utilizes an encoder-decoder structure, which includes an encoder stack and a decoder stack.

Here's the definition of each:

*   **Encoder Stack:**
    *   **Function:** Maps an input sequence of symbol representations (x1,...,xn) to a sequence of continuous representations z = (z1,...,zn).
    *   **Structure:** Composed of a stack of N = 6 identical layers.
    *   **Each Layer's Components:**
        1.  A multi-head self-attention mechanism.
        2.  A simple, position-wise fully connected feed-forward network.
    *   **Self-attention in Encoder:** Each position in the encoder can attend to all positions in the previous layer of the encoder.

*   **Decoder Stack:**
    *   **Function:** Given the continuous representations `z` from the encoder, the decoder generates an output sequence (y1,...,ym) of symbols one element at a time. It is auto-regressive, meaning it consumes pr

In [93]:
query = "Define the figure 1: The Transformer- model architecture."
response = qa_chain.run(query)
print("💬 Answer:", response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
💬 Answer: Figure 1 illustrates **The Transformer model architecture**.

It depicts the overall structure of the Transformer, which is composed of an encoder-decoder architecture.
*   The **left half** of the figure shows the **encoder**, which consists of a stack of N identical layers. Each encoder layer has two sub-layers: a multi-head self-attention mechanism and a position-wise fully connected feed-forward network.
*   The **right half** of the figure shows the **decoder**, also composed of a stack of N identical layers. Each decoder layer has three sub-layers: a masked multi-head self-attention mechanism, a multi-head attention mechanism that attends over the encoder's output, and a position-wise fully connected feed-forward network.

Both the encoder and decoder utilize residual connections and layer normalization around their sub-layers. The figure also indicates the use of positional encoding for both input