In [1]:
from meditations_rag.services.loader import DocumentLoaderService
from meditations_rag.services.chunker import ChunkerService
from meditations_rag.services.metadata import MetadataExtractorService
from meditations_rag.core.chunk_embeding import get_chunk_embedding_model
from meditations_rag.core.llm import create_llm
from meditations_rag.pipelines.ingest import IngestPipeline
from meditations_rag.core.embedding import create_embedding
from meditations_rag.services.vector_store.qdrant_store import QdrantVectorStore
from meditations_rag.services.vector_store.embedding_service import VectorEmbeddingService

In [3]:
soure_file = "C:\\Users\\mrudh\\Documents\\Projects\\ProfileProject\\Agentic-RAG\\meditations-rag\\data\\Marcus-Aurelius-Meditations.pdf"

In [4]:
# Initialize services
loader = DocumentLoaderService()
chunk_embedding_model = get_chunk_embedding_model()
chunk_service = ChunkerService(embed_model=chunk_embedding_model)
embedding_base = create_embedding()
embedding_service = VectorEmbeddingService(embedding_provider=embedding_base)
vector_store = QdrantVectorStore()

# Create metadata extractor
llm = create_llm()
metadata_extractor = MetadataExtractorService(llm=llm, batch_size=5, max_concurrent=3)

# Create pipeline with metadata extraction
ingest_pipeline = IngestPipeline(
    loader=loader, 
    chunk_service=chunk_service,
    metadata_extractor=metadata_extractor,
    embedding_service=embedding_service,
    vector_store=vector_store
)

[32m2025-11-19 03:52:20[0m | [1mINFO    [0m | [1mUsing Local LLM embedding model: local-embedding[0m
[32m2025-11-19 03:52:21[0m | [1mINFO    [0m | [1mInitialized LocalEmbedding with model: local-embedding[0m
2025-11-19 03:52:22,219 - INFO - HTTP Request: GET http://localhost:6333 "HTTP/1.1 200 OK"
[32m2025-11-19 03:52:22[0m | [1mINFO    [0m | [1mInitialized Qdrant client: host=localhost, port=6333, main_collection=meditations, question_collection=meditations_questions[0m
[32m2025-11-19 03:52:22[0m | [1mINFO    [0m | [1mInitialized LocalLLM with model: openrouter/sherlock-think-alpha[0m


In [5]:
import nest_asyncio
nest_asyncio.apply()

# Ingest and extract metadata
chuunk_results, question_results = await ingest_pipeline.ingest(file_path=soure_file)

[32m2025-11-19 03:53:08[0m | [1mINFO    [0m | [1mStarting ingestion for file: C:\Users\mrudh\Documents\Projects\ProfileProject\Agentic-RAG\meditations-rag\data\Marcus-Aurelius-Meditations.pdf[0m
[32m2025-11-19 03:53:10[0m | [1mINFO    [0m | [1mLoaded 128 documents[0m
[32m2025-11-19 03:53:10[0m | [1mINFO    [0m | [1mStarting document chunking...[0m
[32m2025-11-19 03:53:10[0m | [1mINFO    [0m | [1mprocessing 2 in 1 batches[0m
[32m2025-11-19 03:53:10[0m | [1mINFO    [0m | [1mProcessing batch 1/1 with 2 documents[0m
2025-11-19 03:53:26,070 - INFO - HTTP Request: POST http://localhost:8081/v1/embeddings "HTTP/1.1 200 OK"
2025-11-19 03:53:27,061 - INFO - HTTP Request: POST http://localhost:8081/v1/embeddings "HTTP/1.1 200 OK"
[32m2025-11-19 03:53:27[0m | [1mINFO    [0m | [1mCompleted batch 1/1: Generated 5 chunks[0m
[32m2025-11-19 03:53:27[0m | [1mINFO    [0m | [1mChunking completed: 5 chunks created with 0 failures.[0m
[32m2025-11-19 03:53:27[0m |

TextNode(id_='5b961a97-39c5-4c5b-8536-4387d376d5f0', embedding=None, metadata={'total_pages': 128, 'file_path': 'C:\\Users\\mrudh\\Documents\\Projects\\ProfileProject\\Agentic-RAG\\meditations-rag\\data\\Marcus-Aurelius-Meditations.pdf', 'source': '16', 'questions': ['Who is the author of the Meditations referenced in this passage?', "What specific book of Marcus Aurelius' Meditations is introduced here?", "Who is credited with the translation denoted as 'tr. Casaubon v.'?", "What does the title 'Meditations of Marcus Aurelius' signify in Stoic literature?", "How can recognizing the structure starting with the First Book guide one's study of Stoic philosophy?"], 'keywords': ['Meditations', 'Marcus Aurelius', 'First Book', 'Casaubon', 'tr. Casaubon', 'translation', 'Stoic philosophy', 'Book 1', 'Roman emperor', 'personal reflections', 'philosophical journal'], 'topic': 'Gratitude and Appreciation', 'entities': ['Marcus Aurelius', 'Casaubon'], 'philosophical_concepts': [], 'stoic_practic