In [1]:
# Core LlamaIndex
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, Document
from llama_index.core.schema import TextNode, NodeRelationship, RelatedNodeInfo

# Node Parsers (Chunking Strategies)
from llama_index.core.node_parser import (
    SentenceSplitter,
    TokenTextSplitter,
    SemanticSplitterNodeParser,
)

# Metadata Extraction
from llama_index.core.extractors import (
    TitleExtractor,
    SummaryExtractor,
)
from llama_index.core.ingestion import IngestionPipeline

# LLM and Embeddings
from llama_index.llms.groq import Groq
from llama_index.embeddings.gemini import GeminiEmbedding

# Utilities
from dotenv import load_dotenv
import os
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("✅ Imports successful!")

✅ Imports successful!



All support for the `google.generativeai` package has ended. It will no longer be receiving 
updates or bug fixes. Please switch to the `google.genai` package as soon as possible.
See README for more details:

https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/README.md

  import google.generativeai as gemini


In [2]:
# Load environment variables and configure Settings
load_dotenv()

Settings.llm = Groq(model="Qwen/Qwen3-32B",temperature=0.1)
Settings.embed_model = GeminiEmbedding(
    model_name="models/gemini-embedding-001",title="this is a document",
)

print("✅ Settings configured")

✅ Settings configured


In [3]:
import re
def groqLlmResponse(response):
    return  re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL).strip()

In [4]:
# Check data directory structure
data_dir = Path("./data")
sample_docs_dir = data_dir / "sample_docs"
research_papers_dir = data_dir / "research_papers"

print(f"Data directory exists: {data_dir.exists()}")
print(f"Sample docs directory: {sample_docs_dir.exists()}")
print(f"Research papers directory: {research_papers_dir.exists()}")

if research_papers_dir.exists():
    files = list(research_papers_dir.glob("*.pdf"))
    print(f"\nFound {len(files)} PDF files in research_papers/")
    for f in files:
        print(f"  - {f.name}")

Data directory exists: True
Sample docs directory: True
Research papers directory: True

Found 1 PDF files in research_papers/
  - VL_Jepa.pdf


In [5]:
# Create sample documents if no PDFs available
# In practice, you'd load actual PDFs from the data directory

sample_papers = [
    Document(
        text="""
        Title: Attention Is All You Need
        Authors: Vaswani et al.
        Year: 2017
        
        Abstract: The dominant sequence transduction models are based on complex recurrent or 
        convolutional neural networks that include an encoder and a decoder. The best performing 
        models also connect the encoder and decoder through an attention mechanism. We propose a 
        new simple network architecture, the Transformer, based solely on attention mechanisms, 
        dispensing with recurrence and convolutions entirely.
        
        Introduction: Recurrent neural networks, long short-term memory and gated recurrent neural 
        networks in particular, have been firmly established as state of the art approaches in 
        sequence modeling and transduction problems. The Transformer is the first transduction model 
        relying entirely on self-attention to compute representations of its input and output without 
        using sequence-aligned RNNs or convolution.
        """,
        metadata={
            "title": "Attention Is All You Need",
            "authors": "Vaswani et al.",
            "year": 2017,
            "category": "transformers",
            "citations": 85000,
            "source": "research_paper"
        }
    ),
    Document(
        text="""
        Title: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
        Authors: Devlin et al.
        Year: 2019
        
        Abstract: We introduce a new language representation model called BERT, which stands for 
        Bidirectional Encoder Representations from Transformers. Unlike recent language representation 
        models, BERT is designed to pre-train deep bidirectional representations from unlabeled text 
        by jointly conditioning on both left and right context in all layers.
        
        Introduction: Language model pre-training has been shown to be effective for improving many 
        natural language processing tasks. Pre-trained language representations can be either context-free 
        or context-based. BERT alleviates the unidirectionality constraint by using a masked language 
        model (MLM) pre-training objective.
        """,
        metadata={
            "title": "BERT",
            "authors": "Devlin et al.",
            "year": 2019,
            "category": "language_models",
            "citations": 65000,
            "source": "research_paper"
        }
    ),
    Document(
        text="""
        Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
        Authors: Lewis et al.
        Year: 2020
        
        Abstract: Large pre-trained language models have been shown to store factual knowledge in their 
        parameters, and achieve state-of-the-art results when fine-tuned on downstream NLP tasks. However, 
        their ability to access and precisely manipulate knowledge is still limited. We explore a general 
        fine-tuning recipe for retrieval-augmented generation (RAG) models which combine parametric and 
        non-parametric memory.
        
        Introduction: Pre-trained neural language models store and retrieve knowledge using their parameters. 
        RAG models combine parametric memory (the LLM) with non-parametric memory (a dense vector index of 
        Wikipedia). This provides the model with access to up-to-date information and allows for more 
        interpretable and modular systems.
        """,
        metadata={
            "title": "RAG",
            "authors": "Lewis et al.",
            "year": 2020,
            "category": "rag",
            "citations": 3500,
            "source": "research_paper"
        }
    ),
]

print(f"✅ Created {len(sample_papers)} sample research papers")
for doc in sample_papers:
    print(f"  - {doc.metadata['title']} ({doc.metadata['year']})")

✅ Created 3 sample research papers
  - Attention Is All You Need (2017)
  - BERT (2019)
  - RAG (2020)


In [6]:
# Add processing metadata
for doc in sample_papers:
    doc.metadata["processed_date"] = datetime.now().isoformat()
    doc.metadata["char_count"] = len(doc.text)
    doc.metadata["word_count"] = len(doc.text.split())

print("Enhanced metadata for first document:")
for key, value in sample_papers[0].metadata.items():
    print(f"  {key}: {value}")

Enhanced metadata for first document:
  title: Attention Is All You Need
  authors: Vaswani et al.
  year: 2017
  category: transformers
  citations: 85000
  source: research_paper
  processed_date: 2026-01-07T21:42:48.468343
  char_count: 1006
  word_count: 123


In [7]:
# SentenceSplitter: Respects sentence boundaries
sentence_splitter = SentenceSplitter(
    chunk_size=1024,     # Target tokens per chunk
    chunk_overlap=200,   # Overlap to preserve context
    separator=" ",       # Split on spaces first
)

sentence_nodes = sentence_splitter.get_nodes_from_documents(sample_papers)

print(f"SentenceSplitter Results:")
print(f"  Input documents: {len(sample_papers)}")
print(f"  Output nodes: {len(sentence_nodes)}")
print(f"  Avg chars per node: {sum(len(n.text) for n in sentence_nodes) / len(sentence_nodes):.0f}")

print(f"\nFirst node preview:")
print(f"  Text (first 200 chars): {sentence_nodes[0].text[:200]}...")
print(f"  Metadata: {sentence_nodes[0].metadata}")

SentenceSplitter Results:
  Input documents: 3
  Output nodes: 3
  Avg chars per node: 936

First node preview:
  Text (first 200 chars): Title: Attention Is All You Need
        Authors: Vaswani et al.
        Year: 2017

        Abstract: The dominant sequence transduction models are based on complex recurrent or 
        convolutiona...
  Metadata: {'title': 'Attention Is All You Need', 'authors': 'Vaswani et al.', 'year': 2017, 'category': 'transformers', 'citations': 85000, 'source': 'research_paper', 'processed_date': '2026-01-07T21:42:48.468343', 'char_count': 1006, 'word_count': 123}


In [8]:
# TokenTextSplitter: Precise token count control
token_splitter = TokenTextSplitter(
    chunk_size=512,      # Exact token limit
    chunk_overlap=128,   # 25% overlap
    separator=" ",
)

token_nodes = token_splitter.get_nodes_from_documents(sample_papers)

print(f"TokenTextSplitter Results:")
print(f"  Input documents: {len(sample_papers)}")
print(f"  Output nodes: {len(token_nodes)}")
print(f"  Avg chars per node: {sum(len(n.text) for n in token_nodes) / len(token_nodes):.0f}")

# Compare with sentence splitter
print(f"\nComparison:")
print(f"  SentenceSplitter: {len(sentence_nodes)} nodes")
print(f"  TokenTextSplitter: {len(token_nodes)} nodes")
print(f"  Difference: {abs(len(sentence_nodes) - len(token_nodes))} nodes")

TokenTextSplitter Results:
  Input documents: 3
  Output nodes: 3
  Avg chars per node: 936

Comparison:
  SentenceSplitter: 3 nodes
  TokenTextSplitter: 3 nodes
  Difference: 0 nodes


In [9]:
print(f"\nFirst node preview:")
print(f"  Text (first 200 chars): {token_nodes[1].text[:200]}...")
print(f"  Metadata: {token_nodes[1].metadata}")


First node preview:
  Text (first 200 chars): Title: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
        Authors: Devlin et al.
        Year: 2019

        Abstract: We introduce a new language representation ...
  Metadata: {'title': 'BERT', 'authors': 'Devlin et al.', 'year': 2019, 'category': 'language_models', 'citations': 65000, 'source': 'research_paper', 'processed_date': '2026-01-07T21:42:48.472254', 'char_count': 895, 'word_count': 102}


In [11]:
# SemanticSplitterNodeParser: Chunk by meaning, not just size
semantic_splitter = SemanticSplitterNodeParser(
    buffer_size=1,              # Sentences to group for comparison
    breakpoint_percentile_threshold=95,  # Sensitivity to semantic breaks
    embed_model=Settings.embed_model,
)

print("Creating semantic chunks (this will call embedding API)...")
semantic_nodes = semantic_splitter.get_nodes_from_documents(sample_papers)

print(f"\nSemanticSplitterNodeParser Results:")
print(f"  Input documents: {len(sample_papers)}")
print(f"  Output nodes: {len(semantic_nodes)}")
print(f"  Avg chars per node: {sum(len(n.text) for n in semantic_nodes) / len(semantic_nodes):.0f}")
print(f"  Min chars: {min(len(n.text) for n in semantic_nodes)}")
print(f"  Max chars: {max(len(n.text) for n in semantic_nodes)}")

Creating semantic chunks (this will call embedding API)...

SemanticSplitterNodeParser Results:
  Input documents: 3
  Output nodes: 6
  Avg chars per node: 477
  Min chars: 220
  Max chars: 740


In [15]:
print(f"\nFirst node preview:")
print(f"  Text (first 200 chars): {semantic_nodes[1].text[:200]}...")
print(f"  Metadata: {semantic_nodes[1].metadata}")


First node preview:
  Text (first 200 chars): The best performing 
        models also connect the encoder and decoder through an attention mechanism. We propose a 
        new simple network architecture, the Transformer, based solely on attenti...
  Metadata: {'title': 'Attention Is All You Need', 'authors': 'Vaswani et al.', 'year': 2017, 'category': 'transformers', 'citations': 85000, 'source': 'research_paper', 'processed_date': '2026-01-07T21:42:48.468343', 'char_count': 1006, 'word_count': 123}


In [13]:
import pandas as pd

# Compare chunking strategies
strategies = [
    {"name": "Sentence", "nodes": sentence_nodes},
    {"name": "Token", "nodes": token_nodes},
    {"name": "Semantic", "nodes": semantic_nodes},
]

comparison_data = []
for strat in strategies:
    nodes = strat["nodes"]
    comparison_data.append({
        "Strategy": strat["name"],
        "Num Nodes": len(nodes),
        "Avg Chars": int(sum(len(n.text) for n in nodes) / len(nodes)),
        "Min Chars": min(len(n.text) for n in nodes),
        "Max Chars": max(len(n.text) for n in nodes),
        "Std Dev": int(pd.Series([len(n.text) for n in nodes]).std()),
    })

df = pd.DataFrame(comparison_data)
print("\nChunking Strategy Comparison:")
print(df.to_string(index=False))


Chunking Strategy Comparison:
Strategy  Num Nodes  Avg Chars  Min Chars  Max Chars  Std Dev
Sentence          3        936        877        988       55
   Token          3        936        877        988       55
Semantic          6        477        220        740      228


In [16]:
# Enrich nodes with custom metadata
for i, node in enumerate(sentence_nodes):
    # Add node-specific metadata
    node.metadata["node_index"] = i
    node.metadata["chunk_strategy"] = "sentence"
    
    # Derive metadata from content
    text_lower = node.text.lower()
    node.metadata["has_abstract"] = "abstract" in text_lower
    node.metadata["has_introduction"] = "introduction" in text_lower
    node.metadata["mentions_transformer"] = "transformer" in text_lower

print("Enhanced node metadata example:")
print(f"Node 0 metadata: {sentence_nodes[0].metadata}")

Enhanced node metadata example:
Node 0 metadata: {'title': 'Attention Is All You Need', 'authors': 'Vaswani et al.', 'year': 2017, 'category': 'transformers', 'citations': 85000, 'source': 'research_paper', 'processed_date': '2026-01-07T21:42:48.468343', 'char_count': 1006, 'word_count': 123, 'node_index': 0, 'chunk_strategy': 'sentence', 'has_abstract': True, 'has_introduction': True, 'mentions_transformer': True}


In [18]:
sentence_nodes

[TextNode(id_='4a0dce17-ef81-4299-baf8-59beb11cd73f', embedding=None, metadata={'title': 'Attention Is All You Need', 'authors': 'Vaswani et al.', 'year': 2017, 'category': 'transformers', 'citations': 85000, 'source': 'research_paper', 'processed_date': '2026-01-07T21:42:48.468343', 'char_count': 1006, 'word_count': 123, 'node_index': 0, 'chunk_strategy': 'sentence', 'has_abstract': True, 'has_introduction': True, 'mentions_transformer': True}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='329dffd5-441c-46bc-930d-8d1c742f8383', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'title': 'Attention Is All You Need', 'authors': 'Vaswani et al.', 'year': 2017, 'category': 'transformers', 'citations': 85000, 'source': 'research_paper', 'processed_date': '2026-01-07T21:42:48.468343', 'char_count': 1006, 'word_count': 123}, hash='ae44231a79eb163ab22855889ec29bd4f39427604a63ba770b47fa5512fa91e0')}, metadat

In [20]:
# Use LLM to extract metadata
from llama_index.core.extractors import SummaryExtractor, TitleExtractor

# Create extractors
title_extractor = TitleExtractor(
    llm=Settings.llm,
    nodes=5,  # Look at first 5 nodes for title
)

summary_extractor = SummaryExtractor(
    llm=Settings.llm,
    summaries=["self"],  # Summarize each node
)

print("Extracting metadata with LLM (this may take a moment)...")

# Apply to a subset of nodes (to save API calls)
sample_nodes_for_extraction = sentence_nodes[:2]

# Extract summaries
nodes_with_summaries = summary_extractor.process_nodes(sample_nodes_for_extraction)

print(f"\n✅ Extracted summaries for {len(nodes_with_summaries)} nodes")
print(f"\nNode 0 with LLM-generated summary:")
print(f"  Original text (first 150 chars): {nodes_with_summaries[0].text[:150]}...")
if "section_summary" in nodes_with_summaries[0].metadata:
    print(f"  Summary: {groqLlmResponse(nodes_with_summaries[0].metadata['section_summary'])}")

Extracting metadata with LLM (this may take a moment)...


100%|██████████| 2/2 [00:01<00:00,  1.32it/s]


✅ Extracted summaries for 2 nodes

Node 0 with LLM-generated summary:
  Original text (first 150 chars): Title: Attention Is All You Need
        Authors: Vaswani et al.
        Year: 2017

        Abstract: The dominant sequence transduction models are b...
  Summary: **Summary:**  
The section highlights the 2017 paper *"Attention Is All You Need"* by Vaswani et al., which introduced the **Transformer** architecture. Key topics include:  
1. **Transformer Innovation**: A model that replaces traditional recurrent (RNNs, LSTMs, GRUs) and convolutional networks (CNNs) with **self-attention mechanisms**, enabling direct computation of input/output representations without sequential processing.  
2. **Impact on NLP**: The Transformer became a foundational architecture in natural language processing (NLP), revolutionizing tasks like machine translation and text generation.  
3. **Key Concepts**: Focus on *self-attention* for capturing dependencies, eliminating recurrence/convolution, and 




In [21]:
nodes_with_summaries[0].metadata

{'title': 'Attention Is All You Need',
 'authors': 'Vaswani et al.',
 'year': 2017,
 'category': 'transformers',
 'citations': 85000,
 'source': 'research_paper',
 'processed_date': '2026-01-07T21:42:48.468343',
 'char_count': 1006,
 'word_count': 123,
 'node_index': 0,
 'chunk_strategy': 'sentence',
 'has_abstract': True,
 'has_introduction': True,
 'mentions_transformer': True,
 'section_summary': '<think>\nOkay, let\'s see. The user wants a summary of the key topics and entities from the provided section about the "Attention Is All You Need" paper. First, I need to parse the content given. The title is "Attention Is All You Need" by Vaswani et al. from 2017. The category is transformers, and it\'s a research paper with a lot of citations. The abstract mentions that traditional models use recurrent or convolutional networks with encoder-decoder structures and attention mechanisms. The Transformer is introduced as a new architecture that uses only attention, removing recurrence and co

In [22]:
# Extract summaries
nodes_with_summaries = title_extractor.process_nodes(sample_nodes_for_extraction)

print(f"\n✅ Extracted summaries for {len(nodes_with_summaries)} nodes")
print(f"\nNode 0 with LLM-generated title:")
print(f"  Original text (first 150 chars): {nodes_with_summaries[0].text[:150]}...")
print(f"  Extracted title: {nodes_with_summaries[0].metadata['title']}")

100%|██████████| 2/2 [00:06<00:00,  3.00s/it]


✅ Extracted summaries for 2 nodes

Node 0 with LLM-generated title:
  Original text (first 150 chars): Title: Attention Is All You Need
        Authors: Vaswani et al.
        Year: 2017

        Abstract: The dominant sequence transduction models are b...
  Extracted title: Attention Is All You Need





In [23]:
# Inspect node relationships
print("Node Relationships:")
for i, node in enumerate(sentence_nodes[:3]):
    print(f"\nNode {i}:")
    print(f"  ID: {node.node_id}")
    print(f"  Relationships: {list(node.relationships.keys())}")
    
    # Check for source document
    if NodeRelationship.SOURCE in node.relationships:
        source_info = node.relationships[NodeRelationship.SOURCE]
        print(f"  Source Document ID: {source_info.node_id}")
    
    # Check for previous/next nodes
    if NodeRelationship.PREVIOUS in node.relationships:
        print(f"  Has PREVIOUS node")
    if NodeRelationship.NEXT in node.relationships:
        print(f"  Has NEXT node")

Node Relationships:

Node 0:
  ID: 4a0dce17-ef81-4299-baf8-59beb11cd73f
  Relationships: [<NodeRelationship.SOURCE: '1'>]
  Source Document ID: 329dffd5-441c-46bc-930d-8d1c742f8383

Node 1:
  ID: 7bdafe77-1128-45ac-b157-6e60f3f2f709
  Relationships: [<NodeRelationship.SOURCE: '1'>]
  Source Document ID: 1b080091-10da-4a0a-9f6d-3be2a7d265e7

Node 2:
  ID: df4f8c21-537c-4814-b870-fa44107f41dd
  Relationships: [<NodeRelationship.SOURCE: '1'>]
  Source Document ID: 05807c2d-40da-4d34-95e4-056698f26a1d


In [24]:
# Inspect node relationships
print("Node Relationships:")
for i, node in enumerate(semantic_nodes[:]):
    print(f"\nNode {i}:")
    print(f"  ID: {node.node_id}")
    print(f"  Relationships: {list(node.relationships.keys())}")
    
    # Check for source document
    if NodeRelationship.SOURCE in node.relationships:
        source_info = node.relationships[NodeRelationship.SOURCE]
        print(f"  Source Document ID: {source_info.node_id}")
    
    # Check for previous/next nodes
    if NodeRelationship.PREVIOUS in node.relationships:
        print(f"  Has PREVIOUS node")
    if NodeRelationship.NEXT in node.relationships:
        print(f"  Has NEXT node")

Node Relationships:

Node 0:
  ID: b015e802-8907-4f87-b314-72199ece202e
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.NEXT: '3'>]
  Source Document ID: 329dffd5-441c-46bc-930d-8d1c742f8383
  Has NEXT node

Node 1:
  ID: 11b6da02-2629-46a1-bf65-fb2f763de60f
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.PREVIOUS: '2'>]
  Source Document ID: 329dffd5-441c-46bc-930d-8d1c742f8383
  Has PREVIOUS node

Node 2:
  ID: 25630aa5-c2bd-4d99-b204-6027ba6d9379
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.NEXT: '3'>]
  Source Document ID: 1b080091-10da-4a0a-9f6d-3be2a7d265e7
  Has NEXT node

Node 3:
  ID: b894074a-e601-49fc-b852-c3e578a2a198
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.PREVIOUS: '2'>]
  Source Document ID: 1b080091-10da-4a0a-9f6d-3be2a7d265e7
  Has PREVIOUS node

Node 4:
  ID: e7850622-3958-41a8-a402-dc4936960e44
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.NEXT: '3'>]
  So

In [25]:
# Create custom parent-child relationships
# Example: Create a summary node that links to detail nodes

summary_node = TextNode(
    text="Summary: Research papers on transformers, BERT, and RAG",
    metadata={"type": "summary", "level": "0"},
)

# Link detail nodes as children
for node in sentence_nodes[:3]:
    node.relationships[NodeRelationship.PARENT] = RelatedNodeInfo(node_id=summary_node.node_id,)
    node.metadata["level"] = "1"

print("Created hierarchical relationship:")
print(f"  Summary Node ID: {summary_node.node_id}")
print(f"  Child nodes: {len([n for n in sentence_nodes[:3] if NodeRelationship.PARENT in n.relationships])}")

Created hierarchical relationship:
  Summary Node ID: f453e00d-fb00-4898-8abe-76d19ff17898
  Child nodes: 3


In [26]:
# Build ingestion pipeline
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1024, chunk_overlap=200),
        Settings.embed_model,  # Generate embeddings
    ],
)

print("Running ingestion pipeline...")
nodes = pipeline.run(documents=sample_papers, show_progress=True)

print(f"\n✅ Pipeline complete!")
print(f"  Processed {len(sample_papers)} documents")
print(f"  Generated {len(nodes)} nodes")
print(f"  Nodes have embeddings: {nodes[0].embedding is not None}")

Running ingestion pipeline...


Parsing nodes:   0%|          | 0/3 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/3 [00:00<?, ?it/s]


✅ Pipeline complete!
  Processed 3 documents
  Generated 3 nodes
  Nodes have embeddings: True


In [27]:
# Create index from our processed nodes
index = VectorStoreIndex(nodes=nodes)

query_engine = index.as_query_engine(
    similarity_top_k=3,
    response_mode="compact"
)

print("✅ Index created from processed nodes")
print(f"  Total nodes indexed: {len(nodes)}")

✅ Index created from processed nodes
  Total nodes indexed: 3


In [28]:
# Query about transformers
query = "What is the Transformer architecture?"
response = query_engine.query(query)
clearResponse = response.response

print(f"Query: {query}\n")
print("Response:")
print(groqLlmResponse(clearResponse))
print("\n" + "="*80)

# Examine retrieved sources
print("\nRetrieved Sources:")
for i, source_node in enumerate(response.source_nodes, 1):
    print(f"\nSource {i}:")
    print(f"  Score: {source_node.score:.4f}")
    print(f"  Title: {source_node.metadata.get('title', 'N/A')}")
    print(f"  Year: {source_node.metadata.get('year', 'N/A')}")
    print(f"  Category: {source_node.metadata.get('category', 'N/A')}")
    print(f"  Text preview: {source_node.text[:150]}...")

Query: What is the Transformer architecture?

Response:
The Transformer architecture is a neural network design that relies entirely on attention mechanisms to process and transduce sequences. It eliminates the need for recurrence (e.g., RNNs) or convolutional layers by using self-attention to compute representations of input and output sequences. This approach enables parallel processing and captures dependencies between elements regardless of their positional distance, offering a simpler and more efficient alternative to traditional sequence modeling methods.


Retrieved Sources:

Source 1:
  Score: 0.8455
  Title: Attention Is All You Need
  Year: 2017
  Category: transformers
  Text preview: Title: Attention Is All You Need
        Authors: Vaswani et al.
        Year: 2017

        Abstract: The dominant sequence transduction models are b...

Source 2:
  Score: 0.7982
  Title: BERT
  Year: 2019
  Category: language_models
  Text preview: Title: BERT: Pre-training of Deep Bidirecti

In [29]:
# Query about RAG
query2 = "Explain retrieval-augmented generation"
response2 = query_engine.query(query2)
clearResponse2 = response2.response

print(f"Query: {query2}\n")
print("Response:")
print(groqLlmResponse(clearResponse2))
print("\n" + "="*80)

print("\nTop Source:")
top_source = response2.source_nodes[0]
print(f"  Title: {top_source.metadata.get('title')}")
print(f"  Authors: {top_source.metadata.get('authors')}")
print(f"  Citations: {top_source.metadata.get('citations')}")

Query: Explain retrieval-augmented generation

Response:
Retrieval-augmented generation (RAG) is a framework that enhances the capabilities of language models by integrating external knowledge sources during the generation process. It combines two key components: a parametric model (a large language model) and a non-parametric memory (such as a database or knowledge repository). The parametric model generates text based on its training, while the non-parametric memory provides access to up-to-date or task-specific information that may not be fully captured in the model’s internal parameters. 

When processing a query, the system first retrieves relevant information from the external memory using techniques like dense vector indexing. This retrieved data is then combined with the model’s own knowledge to produce more accurate, contextually grounded, and interpretable outputs. This approach addresses limitations in traditional models’ ability to dynamically access or update factual knowl

In [30]:
# Test different chunk sizes
chunk_sizes = [256, 512, 1024, 2048]
test_query = "What are the benefits of attention mechanisms?"

results = []

for chunk_size in chunk_sizes:
    # Create splitter
    splitter = SentenceSplitter(
        chunk_size=chunk_size,
        chunk_overlap=int(chunk_size * 0.2)  # 20% overlap
    )
    
    # Process and index
    temp_nodes = splitter.get_nodes_from_documents(sample_papers)
    temp_index = VectorStoreIndex.from_documents(
        sample_papers,
        transformations=[splitter],
        show_progress=False
    )
    
    # Query
    temp_engine = temp_index.as_query_engine(similarity_top_k=2)
    temp_response = temp_engine.query(test_query)
    
    results.append({
        "Chunk Size": chunk_size,
        "Num Nodes": len(temp_nodes),
        "Top Score": f"{temp_response.source_nodes[0].score:.4f}",
        "Response Len": len(str(temp_response)),
    })

df_results = pd.DataFrame(results)
print("\nChunk Size Impact on Retrieval:")
print(df_results.to_string(index=False))


Chunk Size Impact on Retrieval:
 Chunk Size  Num Nodes Top Score  Response Len
        256          3    0.8143          2356
        512          3    0.8143          2953
       1024          3    0.8143          3019
       2048          3    0.8143          3531
