In [2]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, Document
from llama_index.core.schema import TextNode, NodeRelationship, RelatedNodeInfo

#Node parser (Chunking strategies)
from llama_index.core.node_parser import (
    SentenceSplitter,
    TokenTextSplitter,
    SemanticSplitterNodeParser
)

#metadata extractions
from llama_index.core.extractors import (
    TitleExtractor,
    SummaryExtractor
)
from llama_index.core.ingestion import IngestionPipeline

#LLm and Embaddings
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding

#utilities
from dotenv import load_dotenv
import os
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("âœ… Imports successful!")


âœ… Imports successful!


In [28]:

# Load environment variables and configure Settings
from llama_index.llms.google_genai.base import GoogleGenAI
from dotenv import load_dotenv
load_dotenv()

Settings.llm = GoogleGenAI(model="gemini-3-flash-preview", temperature=0.1)
Settings.embed_model = GoogleGenAIEmbedding(
    model="gemini-embedding-001",
    dimensions=1536
)

print("âœ… Settings configured")

âœ… Settings configured


---

## 2. Loading Documents from Multiple Sources

### 2.1 Local File Loading with SimpleDirectoryReader


In [4]:
# Check data directory structure
data_dir = Path("./data")
sample_docs_dir = data_dir / "sample_docs"
research_papers_dir = data_dir / "research_papers"

print(f"Data directory exists: {data_dir.exists()}")
print(f"Sample docs directory: {sample_docs_dir.exists()}")
print(f"Research papers directory: {research_papers_dir.exists()}")

if research_papers_dir.exists():
    files = list(research_papers_dir.glob("*.pdf"))
    print(f"\nFound {len(files)} PDF files in research_papers/")
    for f in files:
        print(f"  - {f.name}")

Data directory exists: True
Sample docs directory: True
Research papers directory: True

Found 1 PDF files in research_papers/
  - Piyush_Agrawal_AI_resume.pdf


### SimpleDirectoryReader Features

**Key Parameters:**
- `input_dir`: Directory path
- `required_exts`: Filter by extensions (e.g., `[".pdf", ".txt"]`)
- `recursive`: Scan subdirectories
- `filename_as_id`: Use filename as document ID
- `file_metadata`: Custom metadata function
- `exclude_hidden`: Skip hidden files

In [5]:
# Create sample documents if no PDFs available
# In practice, you'd load actual PDFs from the data directory

sample_papers = [
    Document(
        text="""
        Title: Attention Is All You Need
        Authors: Vaswani et al.
        Year: 2017
        
        Abstract: The dominant sequence transduction models are based on complex recurrent or 
        convolutional neural networks that include an encoder and a decoder. The best performing 
        models also connect the encoder and decoder through an attention mechanism. We propose a 
        new simple network architecture, the Transformer, based solely on attention mechanisms, 
        dispensing with recurrence and convolutions entirely.
        
        Introduction: Recurrent neural networks, long short-term memory and gated recurrent neural 
        networks in particular, have been firmly established as state of the art approaches in 
        sequence modeling and transduction problems. The Transformer is the first transduction model 
        relying entirely on self-attention to compute representations of its input and output without 
        using sequence-aligned RNNs or convolution.
        """,
        metadata={
            "title": "Attention Is All You Need",
            "authors": "Vaswani et al.",
            "year": 2017,
            "category": "transformers",
            "citations": 85000,
            "source": "research_paper"
        }
    ),
    Document(
        text="""
        Title: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
        Authors: Devlin et al.
        Year: 2019
        
        Abstract: We introduce a new language representation model called BERT, which stands for 
        Bidirectional Encoder Representations from Transformers. Unlike recent language representation 
        models, BERT is designed to pre-train deep bidirectional representations from unlabeled text 
        by jointly conditioning on both left and right context in all layers.
        
        Introduction: Language model pre-training has been shown to be effective for improving many 
        natural language processing tasks. Pre-trained language representations can be either context-free 
        or context-based. BERT alleviates the unidirectionality constraint by using a masked language 
        model (MLM) pre-training objective.
        """,
        metadata={
            "title": "BERT",
            "authors": "Devlin et al.",
            "year": 2019,
            "category": "language_models",
            "citations": 65000,
            "source": "research_paper"
        }
    ),
    Document(
        text="""
        Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
        Authors: Lewis et al.
        Year: 2020
        
        Abstract: Large pre-trained language models have been shown to store factual knowledge in their 
        parameters, and achieve state-of-the-art results when fine-tuned on downstream NLP tasks. However, 
        their ability to access and precisely manipulate knowledge is still limited. We explore a general 
        fine-tuning recipe for retrieval-augmented generation (RAG) models which combine parametric and 
        non-parametric memory.
        
        Introduction: Pre-trained neural language models store and retrieve knowledge using their parameters. 
        RAG models combine parametric memory (the LLM) with non-parametric memory (a dense vector index of 
        Wikipedia). This provides the model with access to up-to-date information and allows for more 
        interpretable and modular systems.
        """,
        metadata={
            "title": "RAG",
            "authors": "Lewis et al.",
            "year": 2020,
            "category": "rag",
            "citations": 3500,
            "source": "research_paper"
        }
    ),
]

print(f"âœ… Created {len(sample_papers)} sample research papers")
for doc in sample_papers:
    print(f"  - {doc.metadata['title']} ({doc.metadata['year']})")

âœ… Created 3 sample research papers
  - Attention Is All You Need (2017)
  - BERT (2019)
  - RAG (2020)


In [6]:
#Add processsing metadata
for doc in sample_papers:
    doc.metadata["processed_date"] = datetime.now().isoformat()
    doc.metadata["char_count"] = len(doc.text)
    doc.metadata["word_count"] = len(doc.text.split())

print("Enhanced metadata for first document:")
for key, value in sample_papers[0].metadata.items():
    print(f"{key}: {value}")

Enhanced metadata for first document:
title: Attention Is All You Need
authors: Vaswani et al.
year: 2017
category: transformers
citations: 85000
source: research_paper
processed_date: 2026-01-11T20:14:06.403379
char_count: 1006
word_count: 123


---

## 3. Chunking Strategies

### Why Chunking Matters

Chunking is **critical** for RAG quality:

1. **Context Window Limits**: LLMs have token limits
2. **Embedding Quality**: Smaller chunks = more focused embeddings
3. **Retrieval Precision**: Granular chunks improve relevance
4. **Cost Optimization**: Smaller chunks = fewer tokens to LLM

### 3.1 Sentence-Based Chunking

In [7]:
# SentenceSplitter: Respects sentence boundaries
sentence_splitter = SentenceSplitter(
    chunk_size=1024,     # Target tokens per chunk
    chunk_overlap=200,   # Overlap to preserve context
    separator=" ",       # Split on spaces first
)

sentence_nodes = sentence_splitter.get_nodes_from_documents(sample_papers)

print(f"SentenceSplitter Results:")
print(f"  Input documents: {len(sample_papers)}")
print(f"  Output nodes: {len(sentence_nodes)}")
print(f"  Avg chars per node: {sum(len(n.text) for n in sentence_nodes) / len(sentence_nodes):.0f}")

print(f"\nFirst node preview:")
print(f"  Text (first 200 chars): {sentence_nodes[0].text[:200]}...")
print(f"  Metadata: {sentence_nodes[0].metadata}")

SentenceSplitter Results:
  Input documents: 3
  Output nodes: 3
  Avg chars per node: 936

First node preview:
  Text (first 200 chars): Title: Attention Is All You Need
        Authors: Vaswani et al.
        Year: 2017

        Abstract: The dominant sequence transduction models are based on complex recurrent or 
        convolutiona...
  Metadata: {'title': 'Attention Is All You Need', 'authors': 'Vaswani et al.', 'year': 2017, 'category': 'transformers', 'citations': 85000, 'source': 'research_paper', 'processed_date': '2026-01-11T20:14:06.403379', 'char_count': 1006, 'word_count': 123}


### ðŸŽ¯ ML Engineering Note: Chunk Size Selection

**Chunk Size Trade-offs:**

| Size | Pros | Cons | Use Case |
|------|------|------|----------|
| **Small (256-512)** | Precise retrieval, lower cost | May lose context | Q&A, factoid extraction |
| **Medium (512-1024)** | Balanced context/precision | Good default | General RAG, document QA |
| **Large (1024-2048)** | Rich context | Diluted relevance, higher cost | Summarization, broad queries |

**Overlap Guidelines:**
- 10-20% of chunk size (typical)
- Higher overlap (20-30%) for dense, technical content
- Lower overlap (5-10%) for structured documents

### 3.2 Token-Based Chunking

In [8]:
#TokenTextSplitter: Precise token count control
token_splitter = TokenTextSplitter(
    chunk_size=512,
    chunk_overlap=128,
    separator=" ",
)

token_nodes = token_splitter.get_nodes_from_documents(sample_papers)

print(f"TokenTextSplitter Results:")
print(f"  Input documents: {len(sample_papers)}")
print(f"  Output nodes: {len(token_nodes)}")
print(f"  Avg chars per node: {sum(len(n.text) for n in token_nodes) / len(token_nodes):.0f}")

# Compare with sentence splitter
print(f"\nComparison:")
print(f"  SentenceSplitter: {len(sentence_nodes)} nodes")
print(f"  TokenTextSplitter: {len(token_nodes)} nodes")
print(f"  Difference: {abs(len(sentence_nodes) - len(token_nodes))} nodes")

TokenTextSplitter Results:
  Input documents: 3
  Output nodes: 3
  Avg chars per node: 936

Comparison:
  SentenceSplitter: 3 nodes
  TokenTextSplitter: 3 nodes
  Difference: 0 nodes


In [9]:
print(f"\nFirst node preview:")
print(f"  Text (first 200 chars): {token_nodes[1].text[:200]}...")
print(f"  Metadata: {token_nodes[1].metadata}")


First node preview:
  Text (first 200 chars): Title: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
        Authors: Devlin et al.
        Year: 2019

        Abstract: We introduce a new language representation ...
  Metadata: {'title': 'BERT', 'authors': 'Devlin et al.', 'year': 2019, 'category': 'language_models', 'citations': 65000, 'source': 'research_paper', 'processed_date': '2026-01-11T20:14:06.403405', 'char_count': 895, 'word_count': 102}


### 3.3 Semantic Chunking

In [10]:
# SemanticSplitterNodeParser: Chunk by meaning, not just size
sementic_splitter = SemanticSplitterNodeParser(
    buffer_size=1,
    breakpoint_percentile_threshold=95,
    embed_model=Settings.embed_model,
)

print("Creating semantic chunks (this will call embedding API)...")
semantic_nodes = sementic_splitter.get_nodes_from_documents(sample_papers)

print(f"\nSemanticSplitterNodeParser Results:")
print(f"  Input documents: {len(sample_papers)}")
print(f"  Output nodes: {len(semantic_nodes)}")
print(f"  Avg chars per node: {sum(len(n.text) for n in semantic_nodes) / len(semantic_nodes):.0f}")
print(f"  Min chars: {min(len(n.text) for n in semantic_nodes)}")
print(f"  Max chars: {max(len(n.text) for n in semantic_nodes)}")

Creating semantic chunks (this will call embedding API)...

SemanticSplitterNodeParser Results:
  Input documents: 3
  Output nodes: 6
  Avg chars per node: 477
  Min chars: 220
  Max chars: 740


### 3.4 Comparing Chunking Strategies

In [11]:
import pandas as pd

# Compare chunking strategies
strategies = [
    {"name": "Sentence", "nodes": sentence_nodes},
    {"name": "Token", "nodes": token_nodes},
    {"name": "Semantic", "nodes": semantic_nodes},
]

comparison_data = []
for strat in strategies:
    nodes = strat["nodes"]
    comparison_data.append({
        "Strategy": strat["name"],
        "Num Nodes": len(nodes),
        "Avg Chars": int(sum(len(n.text) for n in nodes) / len(nodes)),
        "Min Chars": min(len(n.text) for n in nodes),
        "Max Chars": max(len(n.text) for n in nodes),
        "Std Dev": int(pd.Series([len(n.text) for n in nodes]).std()),
    })

df = pd.DataFrame(comparison_data)
print("\nChunking Strategy Comparison:")
print(df.to_string(index=False))


Chunking Strategy Comparison:
Strategy  Num Nodes  Avg Chars  Min Chars  Max Chars  Std Dev
Sentence          3        936        877        988       55
   Token          3        936        877        988       55
Semantic          6        477        220        740      228


### 4.2 LLM-Based Metadata Extraction

In [25]:
import asyncio
from llama_index.core.extractors import SummaryExtractor, TitleExtractor
from llama_index.core.settings import Settings

async def extract_metadata_async(sentence_nodes):
    # Create extractors
    title_extractor = TitleExtractor(
        llm=Settings.llm,
        nodes=5,
    )

    summary_extractor = SummaryExtractor(
        llm=Settings.llm,
        summaries=["self"],  # Summarize each node
    )

    print("Extracting metadata with LLM (this may take a moment)...")

    # Apply to a subset of nodes (to save API calls)
    sample_nodes_for_extraction = sentence_nodes[:2]

    # ðŸ”¥ ASYNC extraction
    nodes_with_summaries = await summary_extractor.aprocess_nodes(
        sample_nodes_for_extraction
    )

    print(f"\nâœ… Extracted summaries for {len(nodes_with_summaries)} nodes")
    print(f"\nNode 0 with LLM-generated summary:")
    print(f"  Original text (first 150 chars): {nodes_with_summaries[0].text[:150]}...")

    if "section_summary" in nodes_with_summaries[0].metadata:
        print(
            f"  Summary: {nodes_with_summaries[0].metadata['section_summary']}"
        )

    return nodes_with_summaries


In [29]:
# ðŸ”¥ ASYNC title extraction
nodes_with_title = await title_extractor.aprocess_nodes(
    sample_nodes_for_extraction
)

print(f"\nâœ… Extracted summaries for {len(nodes_with_summaries)} nodes")
print(f"\nNode 0 with LLM-generated summary:")
print(f"  Original text (first 150 chars): {nodes_with_summaries[0].text[:150]}...")

# Safely access extracted title
if "title" in nodes_with_title[0].metadata:
    print(f"Extracted title: {nodes_with_title[0].metadata['title']}")
else:
    print("No title extracted")


  0%|          | 0/2 [00:00<?, ?it/s]


ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash\nPlease retry in 4.89002899s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerDayPerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemini-2.5-flash'}, 'quotaValue': '20'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '4s'}]}}

In [14]:
nodes_with_summaries[0].metadata

{'title': 'Attention Is All You Need',
 'authors': 'Vaswani et al.',
 'year': 2017,
 'category': 'transformers',
 'citations': 85000,
 'source': 'research_paper',
 'processed_date': '2026-01-11T20:14:06.403379',
 'char_count': 1006,
 'word_count': 123,
 'section_summary': 'The research paper "Attention Is All You Need" by Vaswani et al. (2017) introduces the **Transformer** model, a novel neural network architecture for sequence transduction. The key innovation of the Transformer is its exclusive reliance on **attention mechanisms**, particularly **self-attention**, entirely dispensing with traditional recurrent neural networks (RNNs, LSTMs, GRUs) and convolutional neural networks (CNNs) that were previously dominant. This foundational work in the "transformers" category aims to compute representations of input and output without sequence-aligned RNNs or convolutions.',
 'document_title': 'Title: Attention Is All You Need (Vaswani et al., 2017): The Transformer, a Pure Attention Model 

---

## 5. Node Relationships

### Understanding Node Relationships

In [15]:
# Inspect node relationships
print("Node Relationships:")
for i, node in enumerate(sentence_nodes[:3]):
    print(f"\nNode {i}:")
    print(f"  ID: {node.node_id}")
    print(f"  Relationships: {list(node.relationships.keys())}")
    
    # Check for source document
    if NodeRelationship.SOURCE in node.relationships:
        source_info = node.relationships[NodeRelationship.SOURCE]
        print(f"  Source Document ID: {source_info.node_id}")
    
    # Check for previous/next nodes
    if NodeRelationship.PREVIOUS in node.relationships:
        print(f"  Has PREVIOUS node")
    if NodeRelationship.NEXT in node.relationships:
        print(f"  Has NEXT node")

Node Relationships:

Node 0:
  ID: 72d5041b-a9a8-43f4-9f0e-fb836a880973
  Relationships: [<NodeRelationship.SOURCE: '1'>]
  Source Document ID: 23aa00b0-7606-437c-b808-8b57b5ca73dd

Node 1:
  ID: e96f93dc-0d0b-4e29-98eb-8a580aa250f5
  Relationships: [<NodeRelationship.SOURCE: '1'>]
  Source Document ID: 2b3f03a5-4719-458c-ba34-47cbd4cb8cb3

Node 2:
  ID: 0796f718-645f-4b1f-86f2-7534d6161aba
  Relationships: [<NodeRelationship.SOURCE: '1'>]
  Source Document ID: eef4b82a-2469-4e10-ba02-70a31656780f


In [16]:
# Inspect node relationships
print("Node Relationships:")
for i, node in enumerate(semantic_nodes[:]):
    print(f"\nNode {i}:")
    print(f"  ID: {node.node_id}")
    print(f"  Relationships: {list(node.relationships.keys())}")
    
    # Check for source document
    if NodeRelationship.SOURCE in node.relationships:
        source_info = node.relationships[NodeRelationship.SOURCE]
        print(f"  Source Document ID: {source_info.node_id}")
    
    # Check for previous/next nodes
    if NodeRelationship.PREVIOUS in node.relationships:
        print(f"  Has PREVIOUS node")
    if NodeRelationship.NEXT in node.relationships:
        print(f"  Has NEXT node")

Node Relationships:

Node 0:
  ID: 6547da38-74e5-4693-9e26-fed7baee5a6a
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.NEXT: '3'>]
  Source Document ID: 23aa00b0-7606-437c-b808-8b57b5ca73dd
  Has NEXT node

Node 1:
  ID: f9e401aa-55d2-4cf9-af4b-d55f6174e03b
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.PREVIOUS: '2'>]
  Source Document ID: 23aa00b0-7606-437c-b808-8b57b5ca73dd
  Has PREVIOUS node

Node 2:
  ID: 86e44d95-f5a9-4af9-acc3-fd4b3ad528bc
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.NEXT: '3'>]
  Source Document ID: 2b3f03a5-4719-458c-ba34-47cbd4cb8cb3
  Has NEXT node

Node 3:
  ID: 67c1d8a3-e458-4882-b7f7-a7ccf65472ce
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.PREVIOUS: '2'>]
  Source Document ID: 2b3f03a5-4719-458c-ba34-47cbd4cb8cb3
  Has PREVIOUS node

Node 4:
  ID: a924f165-f25e-4ae8-843f-03b77c5ea97b
  Relationships: [<NodeRelationship.SOURCE: '1'>, <NodeRelationship.NEXT: '3'>]
  So

---

## 6. Ingestion Pipeline

### Creating a Complete Ingestion Pipeline

In [17]:
#build ingestion pipeline
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1024, chunk_overlap=200),
        Settings.embed_model
    ]
)

print("Running ingestion pipeline...")
nodes = pipeline.run(documents=sample_papers, show_progress=True)

print(f"\nâœ… Pipeline complete!")
print(f"  Processed {len(sample_papers)} documents")
print(f"  Generated {len(nodes)} nodes")
print(f"  Nodes have embeddings: {nodes[0].embedding is not None}")

Running ingestion pipeline...


Parsing nodes:   0%|          | 0/3 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/3 [00:00<?, ?it/s]


âœ… Pipeline complete!
  Processed 3 documents
  Generated 3 nodes
  Nodes have embeddings: True


---

## 7. Building an Index with Optimized Chunks

### Using Our Processed Nodes

In [18]:
index = VectorStoreIndex(nodes=nodes)

query_engine = index.as_query_engine(
    similarity_top_k=3,
    response_mode="compact",
)

print("âœ… Index created from processed nodes")
print(f"  Total nodes indexed: {len(nodes)}")

âœ… Index created from processed nodes
  Total nodes indexed: 3


### Querying with Rich Metadata

In [21]:
import nest_asyncio
nest_asyncio.apply()  # Ensure this runs first in notebook

import asyncio

async def run_query():
    query = "What is the Transformer architecture?"
    response = await query_engine.aquery(query)
    
    print(f"Query: {query}\n")
    print("Response:")
    print(response.response)  # Use .response for string content
    print("\n" + "="*80)
    
    # Examine retrieved sources
    print("\nRetrieved Sources:")
    for i, source_node in enumerate(response.source_nodes, 1):
        print(f"\nSource {i}:")
        print(f"  Score: {source_node.score:.4f}")
        print(f"  Title: {source_node.metadata.get('title', 'N/A')}")
        print(f"  Year: {source_node.metadata.get('year', 'N/A')}")
        print(f"  Category: {source_node.metadata.get('category', 'N/A')}")
        print(f"  Text preview: {source_node.text[:150]}...")
    
    return response

# Run the async function
response = asyncio.run(run_query())


Query: What is the Transformer architecture?

Response:
The Transformer is a network architecture that relies entirely on attention mechanisms to compute representations of its input and output. It was introduced as a simpler model that dispenses with recurrence and convolutions entirely, making it the first transduction model to rely solely on self-attention.


Retrieved Sources:

Source 1:
  Score: 0.6615
  Title: Attention Is All You Need
  Year: 2017
  Category: transformers
  Text preview: Title: Attention Is All You Need
        Authors: Vaswani et al.
        Year: 2017

        Abstract: The dominant sequence transduction models are b...

Source 2:
  Score: 0.5488
  Title: BERT
  Year: 2019
  Category: language_models
  Text preview: Title: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
        Authors: Devlin et al.
        Year: 2019

        Abs...

Source 3:
  Score: 0.5003
  Title: RAG
  Year: 2020
  Category: rag
  Text preview: Title: Re

---

## 8. Chunking Best Practices

### Experiment: Impact of Chunk Size on Retrieval

In [22]:
# Test different chunk sizes
chunk_sizes = [256, 512, 1024, 2048]
test_query = "What are the benefits of attention mechanisms?"

results = []

for chunk_size in chunk_sizes:
    # Create splitter
    splitter = SentenceSplitter(
        chunk_size=chunk_size,
        chunk_overlap=int(chunk_size * 0.2)  # 20% overlap
    )
    
    # Process and index
    temp_nodes = splitter.get_nodes_from_documents(sample_papers)
    temp_index = VectorStoreIndex.from_documents(
        sample_papers,
        transformations=[splitter],
        show_progress=False
    )
    
    # Query
    temp_engine = temp_index.as_query_engine(similarity_top_k=2)
    temp_response = temp_engine.query(test_query)
    
    results.append({
        "Chunk Size": chunk_size,
        "Num Nodes": len(temp_nodes),
        "Top Score": f"{temp_response.source_nodes[0].score:.4f}",
        "Response Len": len(str(temp_response)),
    })

df_results = pd.DataFrame(results)
print("\nChunk Size Impact on Retrieval:")
print(df_results.to_string(index=False))


Chunk Size Impact on Retrieval:
 Chunk Size  Num Nodes Top Score  Response Len
        256          3    0.6242           227
        512          3    0.6242           371
       1024          3    0.6242           371
       2048          3    0.6242           301
