In [1]:
import logging
import sys
import os
import time
from typing import Dict, Any, List, Optional

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Add the current directory to the path
sys.path.append(os.path.abspath('.'))

# Import the hierarchical RAG components
from document_processor import HierarchicalDocumentProcessor, get_document_stats
from embedding import EmbeddingGenerator
from vector_store import HierarchicalVectorStore
from retrieval import HierarchicalRetrievalPipeline
from llm_interface import LLMInterface
from config import (
    validate_config,
    TOP_K_ROOT,
    TOP_K_LEAF,
    KB_PATH
)

2025-05-22 23:42:03,252 - config - INFO - Loading environment variables from: /home/olande/Desktop/Rag_Techniques/.env


In [2]:
# Initialize the HierarchicalRAG
class HierarchicalRAG:
    """Main class for the Hierarchical RAG system."""
    
    def __init__(self, top_k_root: int = TOP_K_ROOT, top_k_leaf: int = TOP_K_LEAF):
        """
        Initialize the Hierarchical RAG system.

        """
        # Validate configuration
        if not validate_config():
            raise ValueError("Invalid configuration")
        
        self.document_processor = HierarchicalDocumentProcessor()
        self.embedding_generator = EmbeddingGenerator()
        self.vector_store = HierarchicalVectorStore()
        self.retrieval_pipeline = HierarchicalRetrievalPipeline(
            top_k_root=top_k_root,
            top_k_leaf=top_k_leaf
        )
        self.llm_interface = LLMInterface()
        
        logger.info(f"Initialized HierarchicalRAG with top_k_root={top_k_root}, top_k_leaf={top_k_leaf}")
    
    def index_documents(self, force_reindex: bool = False) -> Dict[str, Any]:
        """
        Process and index documents for hierarchical retrieval.
        

        """
        start_time = time.time()
        
        # Check if documents are already indexed
        stats = self.vector_store.get_index_stats()
        root_count = stats.get("namespaces", {}).get("root", {}).get("vector_count", 0)
        leaf_count = stats.get("namespaces", {}).get("leaf", {}).get("vector_count", 0)
        
        if root_count > 0 and leaf_count > 0 and not force_reindex:
            logger.info(f"Documents already indexed: {root_count} root chunks, {leaf_count} leaf chunks")
            logger.info("Use force_reindex=True to reindex documents")
            return {
                "status": "already_indexed",
                "root_count": root_count,
                "leaf_count": leaf_count
            }
        
        # If forcing reindex, delete existing vectors
        if force_reindex and (root_count > 0 or leaf_count > 0):
            logger.info("Forcing reindex, deleting existing vectors...")
            self.vector_store.delete_all()
        
        # Process documents into hierarchical chunks
        logger.info("Processing documents into hierarchical chunks...")
        root_chunks, leaf_chunks = self.document_processor.process_documents()
        
        # Generate embeddings for root chunks
        logger.info("Generating embeddings for root chunks...")
        root_chunks_with_embeddings = self.embedding_generator.generate_embeddings_for_chunks(
            root_chunks, "root"
        )
        
        # Generate embeddings for leaf chunks
        logger.info("Generating embeddings for leaf chunks...")
        leaf_chunks_with_embeddings = self.embedding_generator.generate_embeddings_for_chunks(
            leaf_chunks, "leaf"
        )
        
        # Index root chunks
        logger.info("Indexing root chunks...")
        root_upsert_count = self.vector_store.upsert_root_chunks(root_chunks_with_embeddings)
        
        # Index leaf chunks
        logger.info("Indexing leaf chunks...")
        leaf_upsert_count = self.vector_store.upsert_leaf_chunks(leaf_chunks_with_embeddings)
        
        # Calculate indexing time
        indexing_time = time.time() - start_time
        
        # Return indexing statistics
        indexing_stats = {
            "status": "indexed",
            "root_count": root_upsert_count,
            "leaf_count": leaf_upsert_count,
            "indexing_time": indexing_time
        }
        
        logger.info(f"Indexing completed in {indexing_time:.2f} seconds")
        logger.info(f"Indexed {root_upsert_count} root chunks and {leaf_upsert_count} leaf chunks")
        
        return indexing_stats
    
    def query(self, query_text: str, root_filter: Optional[Dict[str, Any]] = None, leaf_filter: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """
        Process a query through the hierarchical RAG pipeline.

        """
        start_time = time.time()
        
        # Retrieve relevant chunks using hierarchical retrieval
        logger.info(f"Retrieving relevant chunks for query: {query_text}")
        retrieval_result = self.retrieval_pipeline.run(
            query=query_text,
            root_filter=root_filter,
            leaf_filter=leaf_filter
        )
        
        # Generate response using the LLM
        logger.info("Generating response...")
        llm_response = self.llm_interface.generate_hierarchical_rag_response(
            query=query_text,
            context=retrieval_result["context"]
        )
        
        # Calculate query time
        query_time = time.time() - start_time
        
        # Combine results
        result = {
            "query": query_text,
            "answer": llm_response["text"],
            "context": retrieval_result["context"],
            "root_chunks": retrieval_result["root_chunks"],
            "leaf_chunks": retrieval_result["leaf_chunks"],
            "query_time": query_time
        }
        
        logger.info(f"Query processed in {query_time:.2f} seconds")
        return result

# Initialize the system
rag = HierarchicalRAG(top_k_root=3, top_k_leaf=5)
print("Hierarchical RAG system initialized!")

2025-05-22 23:42:04,243 - config - INFO - All required configuration variables are present.
2025-05-22 23:42:04,670 - document_processor - INFO - Initialized HierarchicalDocumentProcessor with knowledge base path: /home/olande/Desktop/Rag_Techniques/HRAG/books
2025-05-22 23:42:04,672 - document_processor - INFO - Leaf chunk size: 1000, Root chunk size: 4000
2025-05-22 23:42:04,701 - embedding - INFO - Initialized EmbeddingGenerator with model: models/text-embedding-004
2025-05-22 23:42:04,702 - embedding - INFO - Embedding dimensions: 768
2025-05-22 23:42:06,534 - vector_store - INFO - Pinecone index already exists: hrag-gemini-768
2025-05-22 23:42:08,016 - embedding - INFO - Initialized EmbeddingGenerator with model: models/text-embedding-004
2025-05-22 23:42:08,018 - embedding - INFO - Embedding dimensions: 768
2025-05-22 23:42:08,437 - vector_store - INFO - Pinecone index already exists: hrag-gemini-768
2025-05-22 23:42:08,760 - retrieval - INFO - Initialized HierarchicalRetriever w

Hierarchical RAG system initialized!


In [3]:
# Print document statistics
doc_stats = get_document_stats()
print(f"Knowledge Base Statistics:")
print(f"Path: {KB_PATH}")
print(f"Total Files: {doc_stats['total_files']}")
print(f"Total Size: {doc_stats['total_size_mb']} MB")
print(f"File Types: {doc_stats['file_types']}")

Knowledge Base Statistics:
Path: /home/olande/Desktop/Rag_Techniques/HRAG/books
Total Files: 3
Total Size: 0.28 MB
File Types: {'.txt': 3}


In [4]:
# Index documents
# Set force_reindex=True to reindex documents that are already indexed
index_stats = rag.index_documents(force_reindex=False)
print(f"Indexing Status: {index_stats['status']}")
print(f"Root Chunks: {index_stats['root_count']}")
print(f"Leaf Chunks: {index_stats['leaf_count']}")
if 'indexing_time' in index_stats:
    print(f"Indexing Time: {index_stats['indexing_time']:.2f} seconds")

2025-05-22 23:42:10,812 - __main__ - INFO - Documents already indexed: 40 root chunks, 106 leaf chunks
2025-05-22 23:42:10,815 - __main__ - INFO - Use force_reindex=True to reindex documents


Indexing Status: already_indexed
Root Chunks: 40
Leaf Chunks: 106


In [5]:
# Ask questions...
def ask_question(query):
    print(f"Query: {query}")
    result = rag.query(query)
    print("\nAnswer:")
    print(result["answer"])
    print(f"\nQuery Time: {result['query_time']:.2f} seconds")
    return result

# Query about my biographie
query = "Who studies statistics and programming in Kenyatta University?"
result = ask_question(query)

2025-05-22 23:42:10,840 - __main__ - INFO - Retrieving relevant chunks for query: Who studies statistics and programming in Kenyatta University?
2025-05-22 23:42:10,842 - retrieval - INFO - Retrieving chunks for query: Who studies statistics and programming in Kenyatta University?


Query: Who studies statistics and programming in Kenyatta University?


2025-05-22 23:42:13,988 - vector_store - INFO - Root query returned 3 results
2025-05-22 23:42:14,219 - vector_store - INFO - Leaf query returned 6 results
2025-05-22 23:42:14,223 - retrieval - INFO - Retrieved 3 root chunks and 6 leaf chunks
2025-05-22 23:42:14,226 - __main__ - INFO - Generating response...
2025-05-22 23:42:14,232 - llm_interface - INFO - Generating response for prompt: 
            Question: Who studies statistics and ...
2025-05-22 23:42:18,191 - llm_interface - INFO - Generated response: **Who Studies Statistics and Programming in Kenyat...
2025-05-22 23:42:18,193 - __main__ - INFO - Query processed in 7.35 seconds



Answer:
**Who Studies Statistics and Programming in Kenyatta University?**

Based on the hierarchical context provided, the answer is as follows:

* **Chris Olande** is a student of Statistics and Programming at Kenyatta University. This information can be found in **Section 1**, where it is stated that "Chris Olande is a statistics and programming student at Kenyatta University with a strong background in data science, machine learning, and artificial intelligence."
* **Chris Olande's academic background** in statistics is complemented by a robust understanding of programming, especially in Python. His technical repertoire includes proficiency in libraries such as NumPy, pandas, PyTorch, and Hugging Face Transformers. This information can be found in **Section 1**, **Passage 2**, where it is stated that "At Kenyatta University, he has distinguished himself through rigorous coursework and hands-on projects. His technical repertoire includes proficiency in libraries such as NumPy, pand

In [6]:
# Print the retrieved context
print("Retrieved Context:")
print(result["context"])

Retrieved Context:


[Section 1] (Source: /home/olande/Desktop/Rag_Techniques/HRAG/books/olande.txt)

Summary: Here is a concise summary of Chris Olande's professional profile:

Chris Olande is a statistics and programming student at Kenyatta University with a strong background in data science, machine learning, and artificial intelligence. He has a proven track record of applying statistical rigor and cutting-edge machine learning techniques to real-world problems, particularly in education and AI. His expertise includes proficiency in Python, NumPy, pandas, PyTorch, and Hugging Face Transformers. He has worked on projects such as sentiment analysis, natural language processing, and retrieval-augmented generation systems. Chris is a leader in education and community engagement, and is committed to nurturing curiosity and STEM literacy among younger learners. He is a methodical, results-oriented, and collaborative individual with a strong work ethic and a passion for lifelong learning.

In [7]:
# Print information about the retrieved root chunks
print(f"Retrieved {len(result['root_chunks'])} root chunks:")
for i, chunk in enumerate(result['root_chunks']):
    print(f"\nRoot Chunk {i+1}:")
    print(f"ID: {chunk['id']}")
    print(f"Score: {chunk['score']:.4f}")
    print(f"Source: {chunk['metadata'].get('source', 'unknown')}")
    if 'summary' in chunk:
        print(f"Summary: {chunk['summary'][:200]}...")

Retrieved 3 root chunks:

Root Chunk 1:
ID: root_0_0
Score: 0.6778
Source: /home/olande/Desktop/Rag_Techniques/HRAG/books/olande.txt
Summary: Here is a concise summary of Chris Olande's professional profile:

Chris Olande is a statistics and programming student at Kenyatta University with a strong background in data science, machine learnin...

Root Chunk 2:
ID: root_2_22
Score: 0.3226
Source: /home/olande/Desktop/Rag_Techniques/HRAG/books/romeo_and_juliet.txt
Summary: Here is a concise summary:

Project Gutenberg is a digital library of free eBooks, founded by Professor Michael S. Hart, that can be accessed at www.gutenberg.org. Donations are accepted to support th...

Root Chunk 3:
ID: root_2_21
Score: 0.3037
Source: /home/olande/Desktop/Rag_Techniques/HRAG/books/romeo_and_juliet.txt
Summary: Here is a concise summary of the text:

**Terms of Use for Project Gutenberg Electronic Works**

* Permission is required to charge a fee or distribute works on different terms than specified.
*

In [8]:
# Print information about the retrieved leaf chunks
print(f"Retrieved {len(result['leaf_chunks'])} leaf chunks:")
for i, chunk in enumerate(result['leaf_chunks'][:5]):  # Show only the first 5 for brevity
    print(f"\nLeaf Chunk {i+1}:")
    print(f"ID: {chunk['id']}")
    print(f"Score: {chunk['score']:.4f}")
    print(f"Parent ID: {chunk['metadata'].get('parent_id', 'unknown')}")
    print(f"Text: {chunk['text'][:100]}...")

Retrieved 6 leaf chunks:

Leaf Chunk 1:
ID: leaf_0_0_1
Score: 0.6304
Parent ID: root_0_0
Text: This dual commitment—to technology and to people—makes Chris a well-rounded and impactful contributo...

Leaf Chunk 2:
ID: leaf_0_0_0
Score: 0.6118
Parent ID: root_0_0
Text: Chris Olande: A Professional Profile

Chris Olande is a dynamic and intellectually curious student o...

Leaf Chunk 3:
ID: leaf_2_21_2
Score: 0.3051
Parent ID: root_2_21
Text: International donations are gratefully accepted, but we cannot make
any statements concerning tax tr...

Leaf Chunk 4:
ID: leaf_2_22_0
Score: 0.2813
Parent ID: root_2_22
Text: Please check the Project Gutenberg web pages for current donation
methods and addresses. Donations a...

Leaf Chunk 5:
ID: leaf_2_21_1
Score: 0.2692
Parent ID: root_2_21
Text: 1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the
trademark owner, any agent or...


In [9]:
# Try another query about declaration of independence
query = "Who wrote the declaration of independence?"
result = ask_question(query)

2025-05-22 23:42:18,290 - __main__ - INFO - Retrieving relevant chunks for query: Who wrote the declaration of independence?
2025-05-22 23:42:18,292 - retrieval - INFO - Retrieving chunks for query: Who wrote the declaration of independence?


Query: Who wrote the declaration of independence?


2025-05-22 23:42:19,012 - vector_store - INFO - Root query returned 3 results
2025-05-22 23:42:19,255 - vector_store - INFO - Leaf query returned 9 results
2025-05-22 23:42:19,257 - retrieval - INFO - Retrieved 3 root chunks and 9 leaf chunks
2025-05-22 23:42:19,259 - __main__ - INFO - Generating response...
2025-05-22 23:42:19,262 - llm_interface - INFO - Generating response for prompt: 
            Question: Who wrote the declaration o...
2025-05-22 23:42:27,086 - llm_interface - INFO - Generated response: Who wrote the Declaration of Independence?

Accord...
2025-05-22 23:42:27,091 - __main__ - INFO - Query processed in 8.80 seconds



Answer:
Who wrote the Declaration of Independence?

According to [Section 1] and [Section 2], the Declaration of Independence was written by **Thomas Jefferson**. 

The passage in [Section 1] specifically states: "Author: Thomas Jefferson".

The passage in [Section 2] also mentions that the title of the eBook is "The Declaration of Independence" and the author is "Thomas Jefferson".

Therefore, the answer to the question is that the Declaration of Independence was written by **Thomas Jefferson**.

Query Time: 8.80 seconds


In [10]:
# Try a more specific query
query = "What are the unalienable rights mentioned in the declaration of independence?"
result = ask_question(query)

2025-05-22 23:42:27,114 - __main__ - INFO - Retrieving relevant chunks for query: What are the unalienable rights mentioned in the declaration of independence?
2025-05-22 23:42:27,116 - retrieval - INFO - Retrieving chunks for query: What are the unalienable rights mentioned in the declaration of independence?


Query: What are the unalienable rights mentioned in the declaration of independence?


2025-05-22 23:42:27,870 - vector_store - INFO - Root query returned 3 results
2025-05-22 23:42:28,101 - vector_store - INFO - Leaf query returned 9 results
2025-05-22 23:42:28,102 - retrieval - INFO - Retrieved 3 root chunks and 9 leaf chunks
2025-05-22 23:42:28,104 - __main__ - INFO - Generating response...
2025-05-22 23:42:28,106 - llm_interface - INFO - Generating response for prompt: 
            Question: What are the unalienable ri...
2025-05-22 23:42:42,506 - llm_interface - INFO - Generated response: **Unalienable Rights Mentioned in the Declaration ...
2025-05-22 23:42:42,508 - __main__ - INFO - Query processed in 15.39 seconds



Answer:
**Unalienable Rights Mentioned in the Declaration of Independence**

The Declaration of Independence, written by Thomas Jefferson in 1776, mentions the following unalienable rights:

* **Life**: The right to life is considered a fundamental and inalienable right, essential to human existence.
* **Liberty**: The right to liberty refers to the freedom to make choices and live one's life as one sees fit, without undue interference from others.
* **Pursuit of Happiness**: The right to pursue happiness is a fundamental aspect of human nature, allowing individuals to strive for their own well-being and fulfillment.

These rights are mentioned in the following passage:

"We hold these truths to be self-evident, that all men are created equal, that they are endowed by their Creator with certain unalienable Rights, that among these are Life, Liberty, and the pursuit of Happiness."

These rights are considered unalienable, meaning they cannot be taken away or denied by any government or

In [11]:
# Create a new instance with different retrieval parameters
custom_rag = HierarchicalRAG(top_k_root=2, top_k_leaf=3)

# Try a query with the custom parameters
query = "What grievances were listed in the declaration of independence?"
custom_result = custom_rag.query(query)

print(f"Query: {query}")
print("\nAnswer:")
print(custom_result["answer"])
print(f"\nQuery Time: {custom_result['query_time']:.2f} seconds")
print(f"\nRetrieved {len(custom_result['root_chunks'])} root chunks and {len(custom_result['leaf_chunks'])} leaf chunks")

2025-05-22 23:42:42,529 - config - INFO - All required configuration variables are present.
2025-05-22 23:42:42,580 - document_processor - INFO - Initialized HierarchicalDocumentProcessor with knowledge base path: /home/olande/Desktop/Rag_Techniques/HRAG/books
2025-05-22 23:42:42,581 - document_processor - INFO - Leaf chunk size: 1000, Root chunk size: 4000
2025-05-22 23:42:42,591 - embedding - INFO - Initialized EmbeddingGenerator with model: models/text-embedding-004
2025-05-22 23:42:42,593 - embedding - INFO - Embedding dimensions: 768
2025-05-22 23:42:43,006 - vector_store - INFO - Pinecone index already exists: hrag-gemini-768
2025-05-22 23:42:43,308 - embedding - INFO - Initialized EmbeddingGenerator with model: models/text-embedding-004
2025-05-22 23:42:43,309 - embedding - INFO - Embedding dimensions: 768
2025-05-22 23:42:43,851 - vector_store - INFO - Pinecone index already exists: hrag-gemini-768
2025-05-22 23:42:45,177 - retrieval - INFO - Initialized HierarchicalRetriever w

Query: What grievances were listed in the declaration of independence?

Answer:
**Grievances listed in the Declaration of Independence**

The Declaration of Independence, written by Thomas Jefferson in 1776, lists 27 grievances against King George III. These grievances can be summarized as follows:

**Passage 1**

* Refused to pass laws for the public good
* Imposed taxes without consent
* Abolished trial by jury
* Quartered large numbers of soldiers in the colonies
* Interfered with trade and commerce
* Transported colonists to be tried for crimes in Britain

**Passage 2**

* Quartered large bodies of armed troops among the colonists
* Protected them from punishment for murders committed on the inhabitants of the colonies
* Cut off trade with all parts of the world
* Imposed taxes without consent
* Denied the benefits of trial by jury
* Transported colonists to be tried for pretended offenses
* Abolished the free system of English laws in a neighboring province
* Established an arbitr

In [12]:
# Ask questions about romeo and juliet
query = "How did Romeo die?"
result = ask_question(query)

2025-05-22 23:42:54,160 - __main__ - INFO - Retrieving relevant chunks for query: How did Romeo die?
2025-05-22 23:42:54,163 - retrieval - INFO - Retrieving chunks for query: How did Romeo die?


Query: How did Romeo die?


2025-05-22 23:42:54,854 - vector_store - INFO - Root query returned 3 results
2025-05-22 23:42:55,090 - vector_store - INFO - Leaf query returned 9 results
2025-05-22 23:42:55,092 - retrieval - INFO - Retrieved 3 root chunks and 9 leaf chunks
2025-05-22 23:42:55,093 - __main__ - INFO - Generating response...
2025-05-22 23:42:55,094 - llm_interface - INFO - Generating response for prompt: 
            Question: How did Romeo die?

       ...
2025-05-22 23:43:03,650 - llm_interface - INFO - Generated response: **Romeo's Death**

Romeo's death occurs in the Cap...
2025-05-22 23:43:03,654 - __main__ - INFO - Query processed in 9.49 seconds



Answer:
**Romeo's Death**

Romeo's death occurs in the Capulet family tomb, where he has gone to be with Juliet's body. He has obtained a poison from an apothecary in Mantua and intends to use it to end his life. 

Upon entering the tomb, Romeo finds Paris's body and is grief-stricken. He then lays Paris in the tomb and begins to lament his own fate, comparing it to a "lightning before death" and a "lantern, slaught'rd youth." 

Romeo then decides to die alongside Juliet, whom he believes is already dead. He takes the poison and drinks it, saying "O true apothecary! / Thy drugs are quick. / Thus with a kiss I die" (Passage 3). 

Romeo's body is found by Friar Lawrence, who is shocked to see him dead. Friar Lawrence enters the tomb and finds Juliet, who has also died from a dagger wound. 

The Prince and the Watch enter the scene, and they discover the bodies of Romeo, Paris, and Juliet. The Prince orders an investigation into the murder, and the scene ends with the Prince calling for 

In [13]:
# Print information about the retrieved root chunks
print(f"Retrieved {len(result['root_chunks'])} root chunks:")
for i, chunk in enumerate(result['root_chunks']):
    print(f"\nRoot Chunk {i+1}:")
    print(f"ID: {chunk['id']}")
    print(f"Score: {chunk['score']:.4f}")
    print(f"Source: {chunk['metadata'].get('source', 'unknown')}")
    if 'summary' in chunk:
        print(f"Summary: {chunk['summary'][:300]}...")

Retrieved 3 root chunks:

Root Chunk 1:
ID: root_2_17
Score: 0.6651
Source: /home/olande/Desktop/Rag_Techniques/HRAG/books/romeo_and_juliet.txt
Summary: Here is a concise summary of the scene:

Romeo, desperate to be with Juliet, decides to buy a poison from an apothecary in Mantua. He pays the apothecary with 40 ducats and obtains the poison, which he intends to use to end his life. Meanwhile, Friar Lawrence is unaware of Romeo's plan and is trying...

Root Chunk 2:
ID: root_2_18
Score: 0.6616
Source: /home/olande/Desktop/Rag_Techniques/HRAG/books/romeo_and_juliet.txt
Summary: Here is a concise summary of the scene:

Romeo and Paris fight and Paris is killed. Romeo, grief-stricken, enters the Capulet family tomb where Juliet lies, and he drinks a poison that kills him. Juliet wakes up and finds Romeo dead, and in a fit of grief, she kills herself with his dagger. The Prin...

Root Chunk 3:
ID: root_2_10
Score: 0.6598
Source: /home/olande/Desktop/Rag_Techniques/HRAG/books/romeo_and_jul

In [16]:
# Print information about the retrieved leaf chunks
print(f"Retrieved {len(result['leaf_chunks'])} leaf chunks:")
for i, chunk in enumerate(result['leaf_chunks'][:5]):  # Show only the first 5 for brevity
    print(f"\nLeaf Chunk {i+1}:")
    print(f"ID: {chunk['id']}")
    print(f"Score: {chunk['score']:.4f}")
    print(f"Parent ID: {chunk['metadata'].get('parent_id', 'unknown')}")
    print(f"Text: {chunk['text'][:200]}...")

Retrieved 9 leaf chunks:

Leaf Chunk 1:
ID: leaf_2_10_1
Score: 0.6453
Parent ID: root_2_10
Text: PRINCE.
Romeo slew him, he slew Mercutio.
Who now the price of his dear blood doth owe?

MONTAGUE.
Not Romeo, Prince, he was Mercutio’s friend;
His fault concludes but what the law should end,
The lif...

Leaf Chunk 2:
ID: leaf_2_17_2
Score: 0.6405
Parent ID: root_2_17
Text: PARIS.
I do defy thy conjuration,
And apprehend thee for a felon here.

ROMEO.
Wilt thou provoke me? Then have at thee, boy!

 [_They fight._]

PAGE.
O lord, they fight! I will go call the watch.

 [_...

Leaf Chunk 3:
ID: leaf_2_18_1
Score: 0.6349
Parent ID: root_2_18
Text: [_Enters the monument._]

Romeo! O, pale! Who else? What, Paris too?
And steep’d in blood? Ah what an unkind hour
Is guilty of this lamentable chance?
The lady stirs.

 [_Juliet wakes and stirs._]

JU...

Leaf Chunk 4:
ID: leaf_2_10_0
Score: 0.6282
Parent ID: root_2_10
Text: ROMEO.
I thought all for the best.

MERCUTIO.
Help me into some house, Benv