In [1]:
from llama_index.readers.file import PyMuPDFReader
from rag_app.services.ingest import IngestionService
from llama_index.core.node_parser import SemanticSplitterNodeParser

from rag_app.embeddings import get_chunk_embeddings, get_embed_model
from openai import OpenAI
from rag_app.config.settings import settings
from rag_app.core.vector_client import VectorClient

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
reader = PyMuPDFReader()
chunk_embed_model = get_chunk_embeddings()
embedding_client = get_embed_model()
client = OpenAI(base_url=settings.local_models.completion_base_url, api_key="test")
vector_client = VectorClient(
    api_key=settings.pinecone.api_key,
    environment=settings.pinecone.environment,
    index_name=settings.pinecone.index_name,
    dimension=settings.pinecone.dimension,
    metric=settings.pinecone.metric,
    cloud=settings.pinecone.cloud,
    region=settings.pinecone.region,
)


node_parser = SemanticSplitterNodeParser(embed_model=chunk_embed_model)

ingest_service = IngestionService(
    reader=reader,
    node_parser=node_parser,
    client=client,
    vector_client=vector_client,
    embedding_client=embedding_client,
)

nodes = ingest_service.ingest(file_path="C:/Users/mrudh/Documents/Data/CWMG-KS-Vol-001-I.pdf")

[32m2025-10-20 18:39:26[0m | [1mINFO    [0m | [36mrag_app.embeddings[0m:[36mget_chunk_embeddings[0m:[36m23[0m - [1mUsing llama-index Local Embedding Model[0m
[32m2025-10-20 18:39:26[0m | [1mINFO    [0m | [36mrag_app.embeddings[0m:[36mget_embed_model[0m:[36m39[0m - [1mInitializing Local Models Embedding Model[0m
[32m2025-10-20 18:39:30[0m | [1mINFO    [0m | [36mrag_app.services.ingest[0m:[36m_ingest_file[0m:[36m25[0m - [1mLoading documents from C:/Users/mrudh/Documents/Data/CWMG-KS-Vol-001-I.pdf[0m
[32m2025-10-20 18:39:33[0m | [1mINFO    [0m | [36mrag_app.services.ingest[0m:[36m_ingest_file[0m:[36m27[0m - [1mLoaded 457 documents.[0m
[32m2025-10-20 18:39:33[0m | [1mINFO    [0m | [36mrag_app.services.ingest[0m:[36m_preprocess_documents[0m:[36m31[0m - [1mPreprocessing documents...[0m
[32m2025-10-20 18:39:33[0m | [1mINFO    [0m | [36mrag_app.services.ingest[0m:[36m_preprocess_documents[0m:[36m33[0m - [1mPreprocessed docu

In [3]:
nodes[0].get_content()

'[ xxiii ] \nand in abundance, thus bringing down the prices of the white \nfarmer. The Indian trader lived cheaply, spent little on equipment \nor staff, and, could easily undersell the British and the Dutch. \nThe whites, therefore, feared that they would be swamped by \nthe Indians, if the Indians were allowed to enter the country freely \nand establish themselves on land, or trade as they pleased. \n'