In [1]:
from meditations_rag.services.loader import DocumentLoaderService
from meditations_rag.services.chunker import ChunkerService
from meditations_rag.services.metadata import MetadataExtractorService
from meditations_rag.core.chunk_embeding import get_chunk_embedding_model
from meditations_rag.core.llm import create_llm
from meditations_rag.pipelines.ingest import IngestPipeline

[32m2025-11-18 16:49:22[0m | [1mINFO    [0m | [1mLogging initialized for meditations-rag v0.1.0[0m
[32m2025-11-18 16:49:22[0m | [1mINFO    [0m | [1mEnvironment: development[0m
[32m2025-11-18 16:49:22[0m | [1mINFO    [0m | [1mEnvironment: development[0m


In [2]:
soure_file = "C:\\Users\\mrudh\\Documents\\Projects\\ProfileProject\\Agentic-RAG\\meditations-rag\\data\\Marcus-Aurelius-Meditations.pdf"

In [3]:
# Initialize services
loader = DocumentLoaderService()
chunk_embedding_model = get_chunk_embedding_model()
chunk_service = ChunkerService(embed_model=chunk_embedding_model)

# Create metadata extractor
llm = create_llm()
metadata_extractor = MetadataExtractorService(llm=llm, batch_size=5, max_concurrent=3)

# Create pipeline with metadata extraction
ingest_pipeline = IngestPipeline(
    loader=loader, 
    chunk_service=chunk_service,
    metadata_extractor=metadata_extractor
)

[32m2025-11-18 16:49:27[0m | [1mINFO    [0m | [1mUsing Local LLM embedding model: local-embedding[0m


In [4]:
import nest_asyncio
nest_asyncio.apply()

# Ingest and extract metadata
chunks = await ingest_pipeline.ingest(file_path=soure_file)

[32m2025-11-18 16:49:29[0m | [1mINFO    [0m | [1mStarting ingestion for file: C:\Users\mrudh\Documents\Projects\ProfileProject\Agentic-RAG\meditations-rag\data\Marcus-Aurelius-Meditations.pdf[0m
[32m2025-11-18 16:49:30[0m | [1mINFO    [0m | [1mLoaded 128 documents[0m
[32m2025-11-18 16:49:30[0m | [1mINFO    [0m | [1mStarting document chunking...[0m
[32m2025-11-18 16:49:30[0m | [1mINFO    [0m | [1mprocessing 2 in 1 batches[0m
[32m2025-11-18 16:49:30[0m | [1mINFO    [0m | [1mProcessing batch 1/1 with 2 documents[0m
[32m2025-11-18 16:49:30[0m | [1mINFO    [0m | [1mLoaded 128 documents[0m
[32m2025-11-18 16:49:30[0m | [1mINFO    [0m | [1mStarting document chunking...[0m
[32m2025-11-18 16:49:30[0m | [1mINFO    [0m | [1mprocessing 2 in 1 batches[0m
[32m2025-11-18 16:49:30[0m | [1mINFO    [0m | [1mProcessing batch 1/1 with 2 documents[0m
2025-11-18 16:49:33,933 - INFO - HTTP Request: POST http://localhost:8081/v1/embeddings "HTTP/1.1 200 OK"

In [7]:
# Examine a chunk with enriched metadata
chunk = chunks[2]
print(f"Chunk ID: {chunk.node_id}")
print(f"\nOriginal Metadata:")
print(f"  File: {chunk.metadata.get('file_path')}")
print(f"  Source: {chunk.metadata.get('source')}")
print(f"  Total Pages: {chunk.metadata.get('total_pages')}")

print(f"\nExtracted Metadata:")
print(f"  Topic: {chunk.metadata.get('topic')}")
print(f"  Questions ({len(chunk.metadata.get('questions', []))}):")
for i, q in enumerate(chunk.metadata.get('questions', [])[:3], 1):
    print(f"    {i}. {q}")
print(f"  Keywords: {', '.join(chunk.metadata.get('keywords', [])[:10])}")
print(f"  Entities: {', '.join(chunk.metadata.get('entities', []))}")
print(f"\n  Extracted at: {chunk.metadata.get('metadata_extracted_at')}")

Chunk ID: d7b7e7ee-6f36-4d81-8807-aeb0c2e85d59

Original Metadata:
  File: C:\Users\mrudh\Documents\Projects\ProfileProject\Agentic-RAG\meditations-rag\data\Marcus-Aurelius-Meditations.pdf
  Source: 17
  Total Pages: 128

Extracted Metadata:
  Topic: Gratitude and Appreciation
  Questions (5):
    1. Who were the key individuals Marcus Aurelius knew and valued, such as Apollonius, Rusticus, and Maximus?
    2. What personal indulgences did Marcus Aurelius avoid, including relations with Benedicta and Theodotus?
    3. What does Marcus Aurelius mean by the 'life according to nature' that he meditated upon?
  Keywords: life according to nature, Apollonius, Rusticus, Maximus, inward motions, suggestions of the gods, Benedicta, Theodotus, mother, help the poor
  Entities: Apollonius, Rusticus, Maximus, Benedicta, Theodotus

  Extracted at: None


In [9]:
chunk.__dict__

{'id_': 'd7b7e7ee-6f36-4d81-8807-aeb0c2e85d59',
 'embedding': None,
 'metadata': {'total_pages': 128,
  'file_path': 'C:\\Users\\mrudh\\Documents\\Projects\\ProfileProject\\Agentic-RAG\\meditations-rag\\data\\Marcus-Aurelius-Meditations.pdf',
  'source': '17',
  'questions': ['Who were the key individuals Marcus Aurelius knew and valued, such as Apollonius, Rusticus, and Maximus?',
   'What personal indulgences did Marcus Aurelius avoid, including relations with Benedicta and Theodotus?',
   "What does Marcus Aurelius mean by the 'life according to nature' that he meditated upon?",
   'Why did Marcus Aurelius consider himself the sole cause for not yet living according to nature despite divine suggestions?',
   'How should one prioritize family and upbringing over personal ambitions and dignities, as exemplified by Marcus?'],
  'keywords': ['life according to nature',
   'Apollonius',
   'Rusticus',
   'Maximus',
   'inward motions',
   'suggestions of the gods',
   'Benedicta',
   'Th

In [None]:
# # View complete metadata as dict
# import json
# print(json.dumps(chunk.metadata, indent=2, default=str))